Browse Source

correct BVH

master
Dtouch 2 years ago
parent
commit
0b882462e4
  1. 11
      CMakeLists.txt
  2. 29
      include/device/Nurbs/bvh.cuh
  3. 6
      include/device/Nurbs/nurbs_common.cuh
  4. 6
      include/device/Nurbs/nurbs_curve.cuh
  5. 18
      include/device/Nurbs/nurbs_surface.cuh
  6. 28
      include/device/aabb.cuh
  7. 6
      include/device/device_utils.cuh
  8. 19
      include/device/vec.cuh
  9. 6
      include/utils.h
  10. 137
      src/device/Nurbs/bvh.cu
  11. 7
      src/device/Nurbs/nurbs_common.cu
  12. 6
      src/device/Nurbs/nurbs_curve.cu
  13. 158
      src/device/Nurbs/nurbs_surface.cu
  14. 42
      src/device/aabb.cu
  15. 2
      src/device/device_utils.cu
  16. 13
      src/device/vec.cu
  17. 38
      src/main.cpp
  18. 16
      src/utils.cpp

11
CMakeLists.txt

@ -3,7 +3,16 @@ project(NurbsEvaluator LANGUAGES CXX CUDA)
set(CMAKE_CUDA_STANDARD 14)
add_executable(NurbsEvaluator src/main.cpp src/utils.cpp include/utils.h src/device/NurbsCommon.cu include/device/NurbsCommon.cuh src/device/DeviceUtils.cu include/device/DeviceUtils.cuh src/device/NurbsSurface.cu include/device/NurbsSurface.cuh src/device/NurbsCurve.cu include/device/NurbsCurve.cuh)
add_executable(NurbsEvaluator src/main.cpp
src/utils.cpp include/utils.h
src/device/Nurbs/nurbs_common.cu include/device/Nurbs/nurbs_common.cuh
src/device/device_utils.cu include/device/device_utils.cuh
src/device/Nurbs/nurbs_surface.cu include/device/Nurbs/nurbs_surface.cuh
src/device/Nurbs/nurbs_curve.cu include/device/Nurbs/nurbs_curve.cuh
src/device/aabb.cu include/device/aabb.cuh
src/device/vec.cu include/device/vec.cuh
src/device/Nurbs/bvh.cu include/device/Nurbs/bvh.cuh
)
#add_compile_options("$<$<C_COMPILER_ID:MSVC>:/utf-8>")
#add_compile_options("$<$<CXX_COMPILER_ID:MSVC>:/utf-8>")

29
include/device/Nurbs/bvh.cuh

@ -0,0 +1,29 @@
//
// Created by 14727 on 2022/12/11.
//
#ifndef NURBSEVALUATOR_BVH_CUH
#define NURBSEVALUATOR_BVH_CUH
#include "../aabb.cuh"
struct BVHNode {
AABB bounds; // AABB包围盒
int firstChild, level, childNum = 4; // 第一个孩子节点的下标,该结点所在层次,孩子个数
// 曲面片u, v的范围
float u0, u1, v0, v1;
};
class BVH {
public:
int maxLevel, size;
BVHNode* nodes = nullptr;
__host__ void printQuadTree();
};
__global__ void
buildSurfaceBvh(const float *k, int level, const float *evaluatedPoints, float d_lastKnot_u, float d_lastKnot_v,
int d_sampleCnt_u, int d_sampleCnt_v, BVHNode *BVH);
#endif //NURBSEVALUATOR_BVH_CUH

6
include/device/NurbsCommon.cuh → include/device/Nurbs/nurbs_common.cuh

@ -2,8 +2,8 @@
// Created by 14727 on 2022/11/19.
//
#ifndef NURBSEVALUATOR_NURBSCOMMON_CUH
#define NURBSEVALUATOR_NURBSCOMMON_CUH
#ifndef NURBSEVALUATOR_NURBS_COMMON_CUH
#define NURBSEVALUATOR_NURBS_COMMON_CUH
/**
* 当u值已知时,根据基函数N的递推表达式,采用动态规划的方式求解N值
@ -29,4 +29,4 @@ __global__ void
g_derTexture(float *derTexture, const float *nTexture1, const float *d_knots, int d_pointsCnt, int d_knotsCnt,
int d_sampleCnt);
#endif //NURBSEVALUATOR_NURBSCOMMON_CUH
#endif //NURBSEVALUATOR_NURBS_COMMON_CUH

6
include/device/NurbsCurve.cuh → include/device/Nurbs/nurbs_curve.cuh

@ -2,8 +2,8 @@
// Created by 14727 on 2022/12/9.
//
#ifndef NURBSEVALUATOR_NURBSCURVE_CUH
#define NURBSEVALUATOR_NURBSCURVE_CUH
#ifndef NURBSEVALUATOR_NURBS_CURVE_CUH
#define NURBSEVALUATOR_NURBS_CURVE_CUH
#include <cuda_runtime.h>
#include <vector>
@ -70,4 +70,4 @@ namespace NurbsCurve {
}
#endif //NURBSEVALUATOR_NURBSCURVE_CUH
#endif //NURBSEVALUATOR_NURBS_CURVE_CUH

18
include/device/NurbsSurface.cuh → include/device/Nurbs/nurbs_surface.cuh

@ -2,10 +2,11 @@
// Created by 14727 on 2022/12/9.
//
#ifndef NURBSEVALUATOR_NURBSSURFACE_CUH
#define NURBSEVALUATOR_NURBSSURFACE_CUH
#ifndef NURBSEVALUATOR_NURBS_SURFACE_CUH
#define NURBSEVALUATOR_NURBS_SURFACE_CUH
#include <vector>
#include "cuda_runtime.h"
#include "bvh.cuh"
namespace NurbsSurface {
const int POINT_SIZE = 4;
@ -25,7 +26,7 @@ namespace NurbsSurface {
float d_lastKnot_v, int d_sampleCnt_u, int d_sampleCnt_v);
__global__ static void
g_curvature(const float *derivatives, int sampleCnt_u, int sampleCnt_v, float lastKnot_u, float lastKnot_v);
g_curvature(const float *derivatives, int sampleCnt_u, int sampleCnt_v, float lastKnot_u, float lastKnot_v, float *ms, float*k);
class Surface {
private:
@ -42,7 +43,11 @@ namespace NurbsSurface {
float *d_nTexture1_u; // u方向指向度为p-1时的device中的nurbs基函数矩阵
float *d_nTexture1_v; // v方向指向度为p-1时的device中的nurbs基函数矩阵
float *d_evaluationRes; // evaluation结果
float *d_derivatives; // 一阶导计算结果
float *d_k; // 最大曲率
BVH bvh;
public:
/**
@ -73,6 +78,11 @@ namespace NurbsSurface {
*/
__host__ void curvature(int sampleCnt_u, int sampleCnt_v);
/**
* 供外部CPU程序使用的、负责调用gpu并行计算BVH的方法
*/
__host__ void buildBHV(int sampleCnt_u, int sampleCnt_v);
void setRecordTime(bool r);
~Surface();
@ -82,4 +92,4 @@ namespace NurbsSurface {
#endif //NURBSEVALUATOR_NURBSSURFACE_CUH
#endif //NURBSEVALUATOR_NURBS_SURFACE_CUH

28
include/device/aabb.cuh

@ -0,0 +1,28 @@
//
// Created by 14727 on 2022/12/11.
//
#ifndef NURBSEVALUATOR_AABB_CUH
#define NURBSEVALUATOR_AABB_CUH
#include "vec.cuh"
#include "cstdio"
class AABB {
public:
// 边界
FVec3 pMin, pMax;
__device__ __host__ AABB();
__device__ __host__ explicit AABB(const FVec3& p);
__device__ __host__ AABB(const FVec3& p1, const FVec3& p2);
// aabb包围盒合并操作,包围盒和点
__device__ __host__ AABB Union(const FVec3& p);
// aabb包围盒合并操作,两个包围盒
__device__ __host__ AABB Union(const AABB& b2);
// 判断两个aabb包围盒是否重叠
__device__ __host__ bool IsOverlap(const AABB& b2);
};
#endif //NURBSEVALUATOR_AABB_CUH

6
include/device/DeviceUtils.cuh → include/device/device_utils.cuh

@ -2,8 +2,8 @@
// Created by 14727 on 2022/12/9.
//
#ifndef NURBSEVALUATOR_DEVICEUTILS_CUH
#define NURBSEVALUATOR_DEVICEUTILS_CUH
#ifndef NURBSEVALUATOR_DEVICE_UTILS_CUH
#define NURBSEVALUATOR_DEVICE_UTILS_CUH
__device__ void d_safeFree(float * &p);
@ -20,4 +20,4 @@ __device__ bool d_floatEqual(float a, float b);
__device__ void normalization(float &a, float &b, float &c);
#endif //NURBSEVALUATOR_DEVICEUTILS_CUH
#endif //NURBSEVALUATOR_DEVICE_UTILS_CUH

19
include/device/vec.cuh

@ -0,0 +1,19 @@
//
// Created by 14727 on 2022/12/11.
//
#ifndef NURBSEVALUATOR_VEC_CUH
#define NURBSEVALUATOR_VEC_CUH
#include "cuda_runtime.h"
class FVec3 {
public:
float x, y, z;
__device__ __host__ FVec3(float x_, float y_, float z_);
__device__ __host__ FVec3(FVec3 const &fVec3);
__device__ __host__ FVec3();
};
#endif //NURBSEVALUATOR_VEC_CUH

6
include/utils.h

@ -9,6 +9,8 @@
double get_time();
#else
#include <windows.h>
#include "device/Nurbs/bvh.cuh"
double get_time();
#endif
@ -17,6 +19,10 @@ double get_time();
*
*/
void safeCudaFree(float *&p);
void safeCudaFree(BVHNode *&p);
//template<typename T>
void safeFree(float *&p);
void safeFree(BVHNode *&p);
#endif //UNTITLED1_UTILS_H

137
src/device/Nurbs/bvh.cu

@ -0,0 +1,137 @@
//
// Created by 14727 on 2022/12/11.
//
#include "../../../include/device/Nurbs/bvh.cuh"
/**
* 当前层的第一个节点的坐标
* @param layer 层号。根节点为第一层
*/
__device__ int getStartIdxOfLayerN(int layer) {
if (layer == 1) return 0;
int num = 1;
// 找规律得出
for (int i = 2; i < layer; i++) {
num = num * 4 + 1;
}
return num;
}
/**
* 根据u、v参数域中的小矩形的位置,判断它在BVH叶节点层(也就是最后一层)中的位置
* @param ix
* @param iy
* @return
*/
__device__ int getChildNodeIdx(int ix, int iy) {
int idx = 0;
while(ix > 0) {
int log2XCeil = int(__log2f(ix));
idx += int(pow(4, log2XCeil));
ix -= int(pow(2, log2XCeil));
}
while(iy > 0) {
int log2XCeil = int(__log2f(iy));
idx += 2 * int(pow(4, log2XCeil));
iy -= int(pow(2, log2XCeil));
}
return idx;
}
__global__ void
buildSurfaceBvh(const float *k, int level, const float *evaluatedPoints, float lastKnot_u, float lastKnot_v,
int sampleCnt_u, int sampleCnt_v, BVHNode *BVH) {
// 假设还是二维grid和二维的block
int ix = blockIdx.x * blockDim.x + threadIdx.x;
int iy = blockIdx.y * blockDim.y + threadIdx.y;
if (ix >= sampleCnt_u - 1 || iy >= sampleCnt_v - 1) {
return;
}
float singleStepVal_u = lastKnot_u / (sampleCnt_u - 1);
float singleStepVal_v = lastKnot_v / (sampleCnt_v - 1);
float u = singleStepVal_u * ix, v = singleStepVal_v * iy;
int bias = getChildNodeIdx(ix, iy); // 每层的节点在总数组中的编号相对该层第一个节点的偏置
for (int step = 1; step <= sampleCnt_u - 1 || step <= sampleCnt_v - 1; step = step * 2, --level) {
float step_u = min(step, sampleCnt_u - 1);
float step_v = min(step, sampleCnt_v - 1);
int baseIdx = getStartIdxOfLayerN(level); // 当前层的第一个节点的坐标
int idx = baseIdx + bias;
int tmpIdx = (ix * sampleCnt_v + iy) * 3;
// AABB bound(FVec3(evaluatedPoints[tmpIdx] + *k, evaluatedPoints[tmpIdx + 1] + *k,
// evaluatedPoints[tmpIdx + 2] + *k)); // 最初只包含u,v点
AABB bound(FVec3(evaluatedPoints[tmpIdx], evaluatedPoints[tmpIdx + 1],
evaluatedPoints[tmpIdx + 2])); // 最初只包含u,v点
if(idx == 75) printf("75: %g, %g, %g\n", evaluatedPoints[tmpIdx], evaluatedPoints[tmpIdx + 1], evaluatedPoints[tmpIdx + 2]);
if (step == 1) {
// 叶子节点
BVH[idx].u0 = u;
BVH[idx].u1 = u + step_u * singleStepVal_u;
BVH[idx].v0 = v;
BVH[idx].v1 = v + step_v * singleStepVal_v;
BVH[idx].level = level;
// int tmpIdx1 = ((ix + 1) * sampleCnt_v + iy) * 3;
// bound = bound.Union(FVec3(evaluatedPoints[tmpIdx1] + *k, evaluatedPoints[tmpIdx1 + 1] + *k,
// evaluatedPoints[tmpIdx1 + 2] + *k));
// int tmpIdx2 = (ix * sampleCnt_v + iy + 1) * 3;
// bound = bound.Union(FVec3(evaluatedPoints[tmpIdx2] + *k, evaluatedPoints[tmpIdx2 + 1] + *k,
// evaluatedPoints[tmpIdx2 + 2] + *k));
// int tmpIdx3 = ((ix + 1) * sampleCnt_v + iy + 1) * 3;
// bound = bound.Union(FVec3(evaluatedPoints[tmpIdx3] + *k, evaluatedPoints[tmpIdx3 + 1] + *k,
// evaluatedPoints[tmpIdx3 + 2] + *k));
int tmpIdx1 = ((ix + 1) * sampleCnt_v + iy) * 3;
bound = bound.Union(FVec3(evaluatedPoints[tmpIdx1], evaluatedPoints[tmpIdx1 + 1],
evaluatedPoints[tmpIdx1 + 2]));
int tmpIdx2 = (ix * sampleCnt_v + iy + 1) * 3;
bound = bound.Union(FVec3(evaluatedPoints[tmpIdx2], evaluatedPoints[tmpIdx2 + 1],
evaluatedPoints[tmpIdx2 + 2]));
int tmpIdx3 = ((ix + 1) * sampleCnt_v + iy + 1) * 3;
bound = bound.Union(FVec3(evaluatedPoints[tmpIdx3], evaluatedPoints[tmpIdx3 + 1],
evaluatedPoints[tmpIdx3 + 2]));
BVH[idx].bounds = bound;
BVH[idx].childNum = -1;
} else {
if (ix % step == 0 || iy % step == 0) {
// 非叶子节点
BVH[idx].u0 = u;
BVH[idx].u1 = u + step_u * singleStepVal_u;
BVH[idx].v0 = v;
BVH[idx].v1 = v + step_v * singleStepVal_v;
BVH[idx].level = level;
int firstChild = 4 * idx + 1;
bound = bound.Union(BVH[firstChild].bounds);
bound = bound.Union(BVH[firstChild + 1].bounds);
bound = bound.Union(BVH[firstChild + 2].bounds);
bound = bound.Union(BVH[firstChild + 3].bounds);
BVH[idx].firstChild = firstChild;
BVH[idx].bounds = bound;
}
}
bias /= 4;
__syncthreads();
}
}
__host__ void BVH::printQuadTree() {
for (int l = 1, levelNodesCnt = 1, baseIdx = 0; l <= maxLevel; l++, levelNodesCnt *= 4, baseIdx = baseIdx * 4 + 1) {
for (int biasIdx = 0; biasIdx < levelNodesCnt; biasIdx++) {
int idx = baseIdx + biasIdx;
auto pMax = nodes[idx].bounds.pMax;
auto pMin = nodes[idx].bounds.pMin;
printf("<<%g, %g, %g>,<%g, %g, %g>> ", pMin.x, pMin.y, pMin.z, pMax.x, pMax.y, pMax.z);
}
printf("\n =============$$$$$$$============= \n");
}
}

7
src/device/NurbsCommon.cu → src/device/Nurbs/nurbs_common.cu

@ -2,8 +2,9 @@
// Created by 14727 on 2022/11/19.
//
#include "../../include/device/NurbsCommon.cuh"
#include "../../include/device/DeviceUtils.cuh"
#include "../../../include/device/Nurbs/nurbs_common.cuh"
#include "../../../include/device/device_utils.cuh"
#include <cstdio>
__device__ void d_basisFunction(float *N_Texture, const float *knots, float u, int degree, int d_knotsCnt) {
int m = d_knotsCnt - 1;
@ -45,7 +46,7 @@ __global__ void g_basisTexture(float *nTexture, float *nTexture1, const float *d
for (int i = 0; i < d_pointsCnt; i++) {
nTexture[idx * d_pointsCnt + i] = N_dp[d_degree * (d_knotsCnt - 1) + i];
nTexture1[idx * (d_pointsCnt + 1) + i] = N_dp[(d_degree - 1) * (d_knotsCnt - 1) + i];
// printf("nTexture1: %g ", nTexture1[idx * (d_pointsCnt + 1) + i]);
// if(idx == 3)printf("nTexture1: %g ", nTexture1[idx * (d_pointsCnt + 1) + i]);
}
nTexture1[idx * (d_pointsCnt + 1) + d_pointsCnt] = N_dp[(d_degree - 1) * (d_knotsCnt - 1) +
d_pointsCnt]; // nTexture1多记录一列数据

6
src/device/NurbsCurve.cu → src/device/Nurbs/nurbs_curve.cu

@ -1,9 +1,9 @@
//
// Created by 14727 on 2022/12/9.
//
#include "../../include/device/NurbsCommon.cuh"
#include "../../include/device/NurbsCurve.cuh"
#include "../../include/utils.h"
#include "../../../include/device/Nurbs/nurbs_common.cuh"
#include "../../../include/device/Nurbs/nurbs_curve.cuh"
#include "../../../include/utils.h"
__global__ void
NurbsCurve::g_evaluate(float *res, const float *NTexture, const float *d_points, const int d_pointsCnt,

158
src/device/NurbsSurface.cu → src/device/Nurbs/nurbs_surface.cu

@ -2,13 +2,14 @@
// Created by 14727 on 2022/12/9.
//
#include "../../include/device/NurbsSurface.cuh"
#include "../../include/device/NurbsCommon.cuh"
#include "../../include/device/DeviceUtils.cuh"
#include "../../include/utils.h"
#include "../../../include/device/Nurbs/nurbs_surface.cuh"
#include "../../../include/device/Nurbs/nurbs_common.cuh"
#include "../../../include/utils.h"
#include "../../../include/device/Nurbs/bvh.cuh"
__global__ void
NurbsSurface::g_evaluate(float* res, const float *d_nTexture_u, const float *d_nTexture_v, const float *d_points, int d_pointsCnt_u,
NurbsSurface::g_evaluate(float *res, const float *d_nTexture_u, const float *d_nTexture_v, const float *d_points,
int d_pointsCnt_u,
int d_pointsCnt_v, int d_POINT_SIZE, float d_lastKnot_u, float d_lastKnot_v, int d_sampleCnt_u,
int d_sampleCnt_v) {
@ -40,14 +41,15 @@ NurbsSurface::g_evaluate(float* res, const float *d_nTexture_u, const float *d_n
y = y / sumW;
z = z / sumW;
// int baseIdx = (ix * d_sampleCnt_v + iy) * 3;
// res[baseIdx] = x;
// res[baseIdx + 1] = y;
// res[baseIdx + 2] = z;
int baseIdx = (ix * d_sampleCnt_v + iy) * 3;
res[baseIdx] = x;
res[baseIdx + 1] = y;
res[baseIdx + 2] = z;
// printf("(%d, %d)-->(%g, %g, %g)\n", ix, iy, x, y, z); // %g输出,舍弃无意义的0
}
__global__ void
NurbsSurface::g_derivative(float *derivatives, const float *derTexture_u, const float *derTexture_v,
const float *nTexture_u, const float *nTexture_v, const float *d_points, int d_pointsCnt_u,
@ -125,8 +127,8 @@ NurbsSurface::g_derivative(float *derivatives, const float *derTexture_u, const
}
__global__ void
NurbsSurface::g_curvature(const float *derivatives, int sampleCnt_u, int sampleCnt_v, float lastKnot_u,
float lastKnot_v) {
NurbsSurface::g_curvature(const float *derivatives, const int sampleCnt_u, const int sampleCnt_v, float lastKnot_u,
float lastKnot_v, float *ms, float *k) {
// 二维grid和二维的block
int ix = blockIdx.x * blockDim.x + threadIdx.x;
int iy = blockIdx.y * blockDim.y + threadIdx.y;
@ -202,20 +204,68 @@ NurbsSurface::g_curvature(const float *derivatives, int sampleCnt_u, int sampleC
}
float uvx = (sndPdx_uv + sndPdx_vu) / 2, uvy = (sndPdy_uv + sndPdy_vu) / 2, uvz = (sndPdz_uv + sndPdz_vu) / 2;
normalization(sndPdx_uu, sndPdy_uu, sndPdz_uu);
normalization(uvx, uvy, uvz);
normalization(sndPdx_vv, sndPdy_vv, sndPdz_vv);
// normalization(sndPdx_uv, sndPdy_uv, sndPdz_uv);
// normalization(sndPdx_vu, sndPdy_vu, sndPdz_vu);
// normalization(sndPdx_uu, sndPdy_uu, sndPdz_uu);
// normalization(uvx, uvy, uvz);
// normalization(sndPdx_vv, sndPdy_vv, sndPdz_vv);
if (ix == 8 && iy == 9)
if (ix == 8 && iy == 9) {
printf("(%g, %g) --> uu: (%g, %g, %g), uv: (%g, %g, %g), vv: (%g, %g, %g)\n", u, v, sndPdx_uu, sndPdy_uu,
sndPdz_uu,
uvx, uvy, uvz, sndPdx_vv, sndPdy_vv, sndPdz_vv);
printf("uv: (%g, %g, %g), vu: (%g, %g, %g)\n", sndPdx_uv, sndPdy_uv, sndPdz_uv, sndPdx_vu, sndPdy_vu,
sndPdz_vu);
}
float m1 = max(max(sndPdx_uu, sndPdy_uu), sndPdz_uu);
float m2 = max(max(uvx, uvy), uvz);
float m3 = max(max(sndPdx_vv, sndPdy_vv), sndPdz_vv);
// __shared__ float ms[363];
ms[(ix * sampleCnt_v + iy) * 3] = m1;
ms[(ix * sampleCnt_v + iy) * 3 + 1] = m2;
ms[(ix * sampleCnt_v + iy) * 3 + 2] = m3;
__syncthreads();
// if(ix == 1 && iy == 1) {
// for(int i = 0; i < sampleCnt_u; i++) {
// for(int j = 0; j < sampleCnt_v; j++) {
// printf("%g ", ms[(i * sampleCnt_v + j) * 3]);
// }
// printf("\n");
// }
// }
// 规约求最大值
for (int step = (sampleCnt_u + 1) / 2; step > 1; step = (step + 1) / 2) {
// step 表示现在参与计算最大值的数据的长度的一半
if (ix < step && ix + step < sampleCnt_u) {
ms[(ix * sampleCnt_v + iy) * 3] = max(m1, ms[((ix + step) * sampleCnt_v + iy) * 3]);
ms[(ix * sampleCnt_v + iy) * 3 + 1] = max(m1, ms[((ix + step) * sampleCnt_v + iy) * 3 + 1]);
ms[(ix * sampleCnt_v + iy) * 3 + 2] = max(m1, ms[((ix + step) * sampleCnt_v + iy) * 3 + 2]);
}
}
for (int step = (sampleCnt_v + 1) / 2; step > 1; step = (step + 1) / 2) {
// step 表示现在参与计算最大值的数据的长度的一半
if (iy < step && iy + step < sampleCnt_v) {
ms[iy * 3] = max(ms[iy * 3], ms[(iy + step) * 3]);
ms[iy * 3 + 1] = max(ms[iy * 3 + 1], ms[(iy + step) * 3 + 1]);
ms[iy * 3 + 2] = max(ms[iy * 3 + 2], ms[(iy + step) * 3 + 2]);
}
}
__syncthreads();
int n = sampleCnt_u - 1;
int m = sampleCnt_v - 1;
*k = (ms[0] / (n * n) + 2 * ms[1] / (n * m) + ms[2] / (m * m)) / 8;
// if(ix == 1 && iy == 1)printf("%g gggg\n", ms[0]);
}
__host__ NurbsSurface::Surface::Surface(std::vector<std::vector<std::vector<float>>> controlPoints,
std::vector<float> knots_u, std::vector<float> knots_v) {
std::vector<float> knots_u, std::vector<float> knots_v) {
this->knots_u = std::move(knots_u);
this->knots_v = std::move(knots_v);
this->controlPoints = std::move(controlPoints);
@ -227,7 +277,10 @@ __host__ NurbsSurface::Surface::Surface(std::vector<std::vector<std::vector<floa
d_knots_u = nullptr;
d_knots_v = nullptr;
d_points = nullptr;
d_evaluationRes = nullptr;
d_derivatives = nullptr;
d_k = nullptr;
bvh.nodes = nullptr;
}
__host__ std::vector<std::vector<std::vector<float>>>
@ -274,9 +327,9 @@ NurbsSurface::Surface::evaluate(int sampleCnt_u, int sampleCnt_v) {
// 结果数组
size_t resBytes = sampleCnt_u * sampleCnt_v * 3 * sizeof(float);
float *d_res;
cudaMalloc((void **)&d_res, resBytes);
auto *h_res = (float *)malloc(resBytes);
cudaMalloc((void **) &d_evaluationRes, resBytes);
auto *h_res = (float *) malloc(resBytes);
// 构造g_basisTexture线程层级
@ -297,7 +350,8 @@ NurbsSurface::Surface::evaluate(int sampleCnt_u, int sampleCnt_v) {
cudaDeviceSynchronize();
if (recordTime) time_cost_device = get_time();
g_evaluate <<<grid, block>>>(d_res, d_nTexture_u, d_nTexture_v, d_points, pointsCnt_u, pointsCnt_v, POINT_SIZE,
g_evaluate <<<grid, block>>>(d_evaluationRes, d_nTexture_u, d_nTexture_v, d_points, pointsCnt_u, pointsCnt_v,
POINT_SIZE,
knots_u[knotsCnt_u - 1], knots_v[knotsCnt_v - 1], sampleCnt_u, sampleCnt_v);
cudaDeviceSynchronize(); // 所用线程结束后再获取结束时间。cudaThreadSynchronize()在CUDA1.0后被弃用
if (recordTime) {
@ -306,11 +360,13 @@ NurbsSurface::Surface::evaluate(int sampleCnt_u, int sampleCnt_v) {
time_cost_device);
}
cudaMemcpy(h_res, d_res, resBytes, cudaMemcpyDeviceToHost);
std::vector<std::vector<std::vector<float>>> res(sampleCnt_u, std::vector<std::vector<float>>(sampleCnt_v, std::vector<float>(3, 0.)));
for(int i = 0; i < sampleCnt_u; i++) {
cudaMemcpy(h_res, d_evaluationRes, resBytes, cudaMemcpyDeviceToHost);
std::vector<std::vector<std::vector<float>>> res(sampleCnt_u, std::vector<std::vector<float>>(sampleCnt_v,
std::vector<float>(3,
0.)));
for (int i = 0; i < sampleCnt_u; i++) {
int baseIdx = i * sampleCnt_v * 3;
for(int j = 0; j < sampleCnt_v; j++) {
for (int j = 0; j < sampleCnt_v; j++) {
baseIdx += j * 3;
res[i][j][0] = h_res[baseIdx];
res[i][j][1] = h_res[baseIdx + 1];
@ -323,7 +379,6 @@ NurbsSurface::Surface::evaluate(int sampleCnt_u, int sampleCnt_v) {
safeFree(h_points);
safeFree(h_knots_u);
safeFree(h_knots_v);
safeCudaFree(d_res);
safeFree(h_res);
return res;
@ -380,8 +435,6 @@ __host__ void NurbsSurface::Surface::derivative(int sampleCnt_u, int sampleCnt_v
cudaFree(d_derTexture_v);
}
__host__ void NurbsSurface::Surface::curvature(int sampleCnt_u, int sampleCnt_v) {
// 先计算切向量
derivative(sampleCnt_u, sampleCnt_v);
@ -390,15 +443,22 @@ __host__ void NurbsSurface::Surface::curvature(int sampleCnt_u, int sampleCnt_v)
return;
}
// 构造记录每个采样点中的最大M1、M2、M3的数组
// 这里用共享内存会更好,但使用共享内存动态分配长度总是出错(好像是因为长度过长),后续需要思考解决这个问题
float *ms = nullptr;
cudaMalloc((void **) &ms, sampleCnt_u * sampleCnt_v * 3 * sizeof(float));
cudaMalloc((void **) &d_k, sizeof(float));
// 构造线程层级
dim3 block(32, 32);
dim3 grid((sampleCnt_u + block.x - 1) / block.x, (sampleCnt_v + block.y - 1) / block.y);
dim3 grid((sampleCnt_u - 1 + block.x - 1) / block.x, (sampleCnt_v - 1 + block.y - 1) / block.y);
// 记录用时
double time_cost_device;
if (recordTime) time_cost_device = get_time();
g_curvature<<<grid, block>>>(d_derivatives, sampleCnt_u, sampleCnt_v, knots_u[knots_u.size() - 1],
knots_v[knots_v.size() - 1]);
knots_v[knots_v.size() - 1], ms, d_k);
cudaDeviceSynchronize(); // 所用线程结束后再获取结束时间。cudaThreadSynchronize()在CUDA1.0后被弃用
if (recordTime) {
time_cost_device = get_time() - time_cost_device;
@ -406,6 +466,7 @@ __host__ void NurbsSurface::Surface::curvature(int sampleCnt_u, int sampleCnt_v)
sampleCnt_u * sampleCnt_v,
time_cost_device);
}
safeCudaFree(ms);
}
void NurbsSurface::Surface::setRecordTime(bool r) {
@ -420,5 +481,42 @@ NurbsSurface::Surface::~Surface() {
safeCudaFree(d_points);
safeCudaFree(d_knots_u);
safeCudaFree(d_knots_v);
safeCudaFree(d_k);
safeCudaFree(d_evaluationRes);
safeFree(bvh.nodes);
cudaDeviceReset();
}
}
__host__ void NurbsSurface::Surface::buildBHV(int sampleCnt_u, int sampleCnt_v) {
// 先计算曲率
curvature(sampleCnt_u, sampleCnt_v);
if (POINT_SIZE != controlPoints[0][0].size()) {
printf("Error! Nurbs控制点应表示为长度为4的齐次坐标\n");
return;
}
// 构造线程层级
dim3 block(32, 32);
dim3 grid((sampleCnt_u + block.x - 1) / block.x, (sampleCnt_v + block.y - 1) / block.y);
bvh.maxLevel = max(int(ceil(log2f(sampleCnt_u - 1))) + 1, int(ceil(log2f(sampleCnt_v - 1))) + 1);
bvh.size = (pow(4, bvh.maxLevel) - 1) / 3; // 等比数列求和公示求出数总的节点数
size_t bvhBytes = sizeof(BVHNode) * bvh.size;
BVHNode *d_bvh = nullptr;
cudaMalloc((void **) &d_bvh, bvhBytes);
// 记录用时
double time_cost_device = get_time();
buildSurfaceBvh<<<grid, block>>>(d_k, bvh.maxLevel, d_evaluationRes, knots_u[knots_u.size() - 1],
knots_v[knots_v.size() - 1], sampleCnt_u, sampleCnt_v, d_bvh);
cudaDeviceSynchronize();
if (recordTime) {
time_cost_device = get_time() - time_cost_device;
printf("GPU time cost of surface second derivative calculating for %d samples: %lf\n",
sampleCnt_u * sampleCnt_v,
time_cost_device);
}
if (bvh.nodes == nullptr) bvh.nodes = (BVHNode *) malloc(bvhBytes);
cudaMemcpy(bvh.nodes, d_bvh, bvhBytes, cudaMemcpyDeviceToHost);
safeCudaFree(d_bvh);
// bvh.printQuadTree();
}

42
src/device/aabb.cu

@ -0,0 +1,42 @@
//
// Created by 14727 on 2022/12/11.
//
#include "../../include/device/aabb.cuh"
#define D_DBL_MAX 1.7976931348623158e+308
#define D_FLT_MAX 3.402823466e+38F
__device__ __host__ AABB::AABB() {
float minNum = -D_FLT_MAX;
float maxNum = D_FLT_MAX;
pMin = FVec3(minNum, minNum, minNum);
pMax = FVec3(maxNum, maxNum, maxNum);
}
__device__ __host__ AABB::AABB(const FVec3 &p) : pMin(p), pMax(p) {}
__device__ __host__ AABB::AABB(const FVec3 &p1, const FVec3 &p2) {
pMin = FVec3(fmin(p1.x, p2.x), fmin(p1.y, p2.y), fmin(p1.z, p2.z));
pMax = FVec3(fmax(p1.x, p2.x), fmax(p1.y, p2.y), fmax(p1.z, p2.z));
}
__device__ __host__ AABB AABB::Union(const FVec3 &p) {
return {FVec3(fmin(pMin.x, p.x), fmin(pMin.y, p.y), fmin(pMin.z, p.z)),
FVec3(fmax(pMax.x, p.x), fmax(pMax.y, p.y), fmax(pMax.z, p.z))};
}
__device__ __host__ AABB AABB::Union(const AABB &b2) {
return {FVec3(fmin(pMin.x, b2.pMin.x), fmin(pMin.y, b2.pMin.y), fmin(pMin.z, b2.pMin.z)),
FVec3(fmax(pMax.x, b2.pMax.x), fmax(pMax.y, b2.pMax.y), fmax(pMax.z, b2.pMax.z))};
}
__device__ __host__ bool AABB::IsOverlap(const AABB& b2) {
if (pMin.x > b2.pMax.x) return false;
if (b2.pMin.x > pMax.x) return false;
if (pMin.y > b2.pMax.y) return false;
if (b2.pMin.y > pMax.y) return false;
if (pMin.z > b2.pMax.z) return false;
if (b2.pMin.z > pMax.z) return false;
return true;
}

2
src/device/DeviceUtils.cu → src/device/device_utils.cu

@ -2,7 +2,7 @@
// Created by 14727 on 2022/12/9.
//
#include "../../include/device/DeviceUtils.cuh"
#include "../../include/device/device_utils.cuh"
__device__ void d_safeFree(float *&p) {
if (p != nullptr) {

13
src/device/vec.cu

@ -0,0 +1,13 @@
//
// Created by 14727 on 2022/12/11.
//
#include "../../include/device/vec.cuh"
__device__ __host__ FVec3::FVec3(float x_, float y_, float z_) : x(x_), y(y_), z(z_) {}
__device__ __host__ FVec3::FVec3(const FVec3 &fVec3) : x(static_cast<float>(fVec3.x)),
y(static_cast<float>(fVec3.y)),
z(static_cast<float>(fVec3.z)) {}
__device__ __host__ FVec3::FVec3():x(0.), y(0.), z(0.) {}

38
src/main.cpp

@ -1,17 +1,17 @@
#include <cstdio>
#include "../include/device/NurbsCurve.cuh"
#include "../include/device/NurbsSurface.cuh"
#include "../include/device/Nurbs/nurbs_curve.cuh"
#include "../include/device/Nurbs/nurbs_surface.cuh"
int main() {
NurbsSurface::Surface nurbsSurfaceEvaluator({
{{-1, 0, 0, 0.3}, {0, 1, 6, 0.8}, {1, 0, 4, 0.5}, {2, 0.5, 3, 0.8}, {3, 3, 1, 0.6}, {4, -5, 0, 0.7}},
{{-2, 1, 1.2, 0.2}, {1, 2, 3, 0.3}, {2, 2, 3, 0.6}, {-1, -0.3, 2, 0.4}, {-1, 2, 0, 0.9}, {7, -8, 2, 0.3}},
{{-3.4, 2, 3, 0.8}, {2, 3, 0, 0.6}, {4, 3, 7, 0.3}, {-2, 0, -0.2, 0.4}, {1, 1.7, 5, 0.6}, {9, -10.3, 6, 0.7}},
{{-1.5, 3.2, 1, 0.5}, {2.6, 7, -2, 0.7}, {5, 0.8, 4.2, 0.8}, {-4, 1, 4, 0.7}, {2.1, 4, -2, 0.3}, {11, -6, 4, 0.6}},
{{-0.2, 2, 0, 0.7}, {5, 3, 2, 0.4}, {5, 1.5, 1.4, 0.6}, {-3, 2, 5, 0.8}, {0.8, 1.3, 0, 0.5}, {15, -2, 0.9, 0.6}},
{{3, 1.4, -1, 0.4}, {6, 2, 4, 0.6}, {-1, 0, -2, 0.4}, {0, 2.8, 2, 0.6}, {-0.5, 2, 1.2, 0.9}, {7, -3, -2, 0.3}},},
{0, 0, 0, 0.1, 0.5, 0.8, 1, 1, 1},
{0, 0, 0, 0.2, 0.7, 0.8, 1, 1, 1});
// NurbsSurface::Surface nurbsSurfaceEvaluator({
// {{-1, 0, 0, 0.3}, {0, 1, 6, 0.8}, {1, 0, 4, 0.5}, {2, 0.5, 3, 0.8}, {3, 3, 1, 0.6}, {4, -5, 0, 0.7}},
// {{-2, 1, 1.2, 0.2}, {1, 2, 3, 0.3}, {2, 2, 3, 0.6}, {-1, -0.3, 2, 0.4}, {-1, 2, 0, 0.9}, {7, -8, 2, 0.3}},
// {{-3.4, 2, 3, 0.8}, {2, 3, 0, 0.6}, {4, 3, 7, 0.3}, {-2, 0, -0.2, 0.4}, {1, 1.7, 5, 0.6}, {9, -10.3, 6, 0.7}},
// {{-1.5, 3.2, 1, 0.5}, {2.6, 7, -2, 0.7}, {5, 0.8, 4.2, 0.8}, {-4, 1, 4, 0.7}, {2.1, 4, -2, 0.3}, {11, -6, 4, 0.6}},
// {{-0.2, 2, 0, 0.7}, {5, 3, 2, 0.4}, {5, 1.5, 1.4, 0.6}, {-3, 2, 5, 0.8}, {0.8, 1.3, 0, 0.5}, {15, -2, 0.9, 0.6}},
// {{3, 1.4, -1, 0.4}, {6, 2, 4, 0.6}, {-1, 0, -2, 0.4}, {0, 2.8, 2, 0.6}, {-0.5, 2, 1.2, 0.9}, {7, -3, -2, 0.3}},},
// {0, 0, 0, 0.1, 0.5, 0.8, 1, 1, 1},
// {0, 0, 0, 0.2, 0.7, 0.8, 1, 1, 1});
// NurbsSurface::Evaluator nurbsSurfaceEvaluator({
// {{-1, 0, 0, 1}, {0, 1, 6, 1}, {1, 0, 4, 1}, {2, 0.5, 3, 1}, {3, 3, 1, 1}, {4, -5, 0, 1}},
// {{-2, 1, 1.2, 1}, {1, 2, 3, 1}, {2, 2, 3, 1}, {-1, -0.3, 2, 1}, {-1, 2, 0, 1}, {7, -8, 2, 1}},
@ -21,9 +21,16 @@ int main() {
// {{3, 1.4, -1, 1}, {6, 2, 4, 1}, {-1, 0, -2, 1}, {0, 2.8, 2, 1}, {-0.5, 2, 1.2, 1}, {7, -3, -2, 1}},},
// {0, 0, 0, 0.1, 0.5, 0.8, 1, 1, 1},
// {0, 0, 0, 0.2, 0.7, 0.8, 1, 1, 1});
NurbsSurface::Surface nurbsSurfaceEvaluator({
{{-0.57, -0.48, -0.31, 1}, {-0.17, -0.49, -0.43, 1}, {0.3, -0.53, -0.27, 1}, {0.66, -0.42, -0.36, 1}},
{{-0.53, -0.15, -0.03, 1}, {-0.21, -0.12, 0.09, 1}, {0.10, -0.18, 0.03, 1}, {0.49, -0.15, -0.03, 1}},
{{-0.61, 0.22, 0.09, 1}, {-0.21, 0.20, 0.09, 1}, {0.13, 0.20, 0.12, 1}, {0.49, 0.19, -0.03, 1}},
{{-0.52, 0.51, 0.13, 1}, {-0.20, 0.32, 0.36, 1}, {0.18, 0.29, 0.28, 1}, {0.51, 0.53, 0.12, 1}}
}, {0, 0, 0, 0, 1, 1, 1, 1}, {0, 0, 0, 0, 1, 1, 1, 1});
nurbsSurfaceEvaluator.setRecordTime(true);
nurbsSurfaceEvaluator.curvature(101, 101);
nurbsSurfaceEvaluator.buildBHV(17, 17);
// nurbsSurfaceEvaluator.evaluate(101, 101);
printf("==============================\n");
@ -34,15 +41,14 @@ int main() {
{2, 0.5, 3, 0.4},
{3, 3, 1, 0.5},
{4, -5, 0, 0.7}},
{0, 0, 0, 0.1, 0.5, 0.8, 1, 1, 1});
{0, 0, 0.1, 0.2, 0.5, 0.8, 1, 1, 1});
nurbsCurveEvaluator.setRecordTime(true);
// nurbsCurveEvaluator.curvature(11);
auto res = nurbsCurveEvaluator.evaluate(11);
for(auto & re : res) {
for (auto &re: res) {
printf("%f, %f, %f\n", re[0], re[1], re[2]);
}
printf("\n");
// nurbsCurveEvaluator.derivative();
return 0;
}

16
src/utils.cpp

@ -38,9 +38,23 @@ void safeCudaFree(float *&p) {
}
}
void safeCudaFree(BVHNode *&p){
if (p != nullptr) {
cudaFree(p);
p = nullptr;
}
}
//template<typename T>
void safeFree(float *&p) {
if (p != nullptr) {
free(p);
p = nullptr;
}
}
}
void safeFree(BVHNode *&p) {
if (p != nullptr) {
free(p);
p = nullptr;
}
}

Loading…
Cancel
Save