From 7896d545c22f913354747f97b627fcf08b562af5 Mon Sep 17 00:00:00 2001 From: Dtouch Date: Fri, 9 Dec 2022 16:16:27 +0800 Subject: [PATCH] =?UTF-8?q?=E9=87=8D=E6=96=B0=E8=B0=83=E6=95=B4=E9=A1=B9?= =?UTF-8?q?=E7=9B=AE=E7=BB=93=E6=9E=84?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- .gitignore | 3 +- CMakeLists.txt | 14 +- NurbsBasis.cu | 38 -- NurbsBasis.cuh | 28 -- NurbsEvaluator.cu | 791 -------------------------------- NurbsEvaluator.cuh | 191 -------- README.md | 6 +- include/device/DeviceUtils.cuh | 23 + include/device/NurbsCommon.cuh | 32 ++ include/device/NurbsCurve.cuh | 73 +++ include/device/NurbsSurface.cuh | 85 ++++ include/utils.h | 22 + src/device/DeviceUtils.cu | 26 ++ src/device/NurbsCommon.cu | 76 +++ src/device/NurbsCurve.cu | 283 ++++++++++++ src/device/NurbsSurface.cu | 424 +++++++++++++++++ main.cpp => src/main.cpp | 19 +- src/utils.cpp | 46 ++ utils.cpp | 19 - utils.h | 11 - 20 files changed, 1120 insertions(+), 1090 deletions(-) delete mode 100644 NurbsBasis.cu delete mode 100644 NurbsBasis.cuh delete mode 100644 NurbsEvaluator.cu delete mode 100644 NurbsEvaluator.cuh create mode 100644 include/device/DeviceUtils.cuh create mode 100644 include/device/NurbsCommon.cuh create mode 100644 include/device/NurbsCurve.cuh create mode 100644 include/device/NurbsSurface.cuh create mode 100644 include/utils.h create mode 100644 src/device/DeviceUtils.cu create mode 100644 src/device/NurbsCommon.cu create mode 100644 src/device/NurbsCurve.cu create mode 100644 src/device/NurbsSurface.cu rename main.cpp => src/main.cpp (87%) create mode 100644 src/utils.cpp delete mode 100644 utils.cpp delete mode 100644 utils.h diff --git a/.gitignore b/.gitignore index 7b659fd..6405c3a 100644 --- a/.gitignore +++ b/.gitignore @@ -41,5 +41,4 @@ *.app cmake-build-debug/ -.idea/ - +.idea/ \ No newline at end of file diff --git a/CMakeLists.txt b/CMakeLists.txt index f6c9e4e..de4778f 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -3,7 +3,7 @@ project(NurbsEvaluator LANGUAGES CXX CUDA) set(CMAKE_CUDA_STANDARD 14) -add_executable(NurbsEvaluator main.cpp NurbsEvaluator.cu NurbsEvaluator.cuh utils.cpp utils.h NurbsBasis.cu NurbsBasis.cuh) +add_executable(NurbsEvaluator src/main.cpp src/utils.cpp include/utils.h src/device/NurbsCommon.cu include/device/NurbsCommon.cuh src/device/DeviceUtils.cu include/device/DeviceUtils.cuh src/device/NurbsSurface.cu include/device/NurbsSurface.cuh src/device/NurbsCurve.cu include/device/NurbsCurve.cuh) #add_compile_options("$<$:/utf-8>") #add_compile_options("$<$:/utf-8>") @@ -14,7 +14,19 @@ add_executable(NurbsEvaluator main.cpp NurbsEvaluator.cu NurbsEvaluator.cuh util #add_library(NurbsEvaluator NurbsEvaluator.cu NurbsEvaluator.cuh utils.cpp utils.h) # 引用系统环境变量CUDA_PATH +# linux +#include_directories("/usr/local/device-11.8/targets/x86_64-linux/include") +# windows include_directories("$ENV{CUDA_PATH}/include") +#MESSAGE("CUDA PATH::: $ENV{CUDA_PATH}") +#MESSAGE("CUDA PATH::: $ENV{LD_LIBRARY_PATH}") +#MESSAGE("CUDA PATH::: $ENV{CPATH}") + set_target_properties(NurbsEvaluator PROPERTIES CUDA_SEPARABLE_COMPILATION ON) + + +if(NOT DEFINED CMAKE_CUDA_ARCHITECTURES) + set(CMAKE_CUDA_ARCHITECTURES 70 75 80) +endif(NOT DEFINED CMAKE_CUDA_ARCHITECTURES) \ No newline at end of file diff --git a/NurbsBasis.cu b/NurbsBasis.cu deleted file mode 100644 index 109224e..0000000 --- a/NurbsBasis.cu +++ /dev/null @@ -1,38 +0,0 @@ -// -// Created by 14727 on 2022/11/19. -// - -#include "NurbsBasis.cuh" - -__device__ void d_basisFunction1(float *N_Texture, const float *knots, float u, int degree, int d_knotsCnt) { - int m = d_knotsCnt - 1; - for (int p = 0; p <= degree; p++) { - for (int i = 0; i + p <= m - 1; i++) { - if (p == 0) { - if ((u > knots[i] || d_floatEqual1(u, knots[i])) && (u < knots[i + 1]) - || - d_floatEqual1(u, knots[i + 1]) && d_floatEqual1(u, knots[m])) { - N_Texture[p * m + i] = 1.0; - } else { - N_Texture[p * m + i] = 0.0; - } - } else { - float Nip_1 = N_Texture[(p - 1) * m + i]; - float Ni1p_1 = N_Texture[(p - 1) * m + i + 1]; - float left = d_floatEqual1(knots[i + p], knots[i]) ? 0 : (u - knots[i]) * Nip_1 / - (knots[i + p] - knots[i]); - float right = d_floatEqual1(knots[i + p + 1], knots[i + 1]) ? 0 : (knots[i + p + 1] - u) * Ni1p_1 / - (knots[i + p + 1] - knots[i + 1]); - N_Texture[p * m + i] = left + right; - } - } - } -} - -__device__ bool d_floatEqual1(float a, float b) { - return abs(a - b) < 0.00001; -} - -void NurbsBasis::myPrint11(int a, int b) { - printf("In NurbsBasis %d!!!\n", a * b); -} diff --git a/NurbsBasis.cuh b/NurbsBasis.cuh deleted file mode 100644 index 3051841..0000000 --- a/NurbsBasis.cuh +++ /dev/null @@ -1,28 +0,0 @@ -// -// Created by 14727 on 2022/11/19. -// - -#ifndef NURBSEVALUATOR_NURBSBASIS_CUH -#define NURBSEVALUATOR_NURBSBASIS_CUH - -#include -#include "cstdio" - -/** - * 当u值已知时,根据基函数N的递推表达式,采用动态规划的方式求解N值 - * @param N_Texture 结果返回在N_Texture中 - */ -extern __device__ void d_basisFunction1(float *N_Texture, const float *knots, float u, int degree, int d_knotsCnt); - -/** - * device中判断两个浮点数是否相等。与CPU中一样,GPU中的浮点数也存在很小的误差,直接使用==判断往往容易将相等误判为不等 - * @return true:相等 - */ -extern __device__ bool d_floatEqual1(float a, float b); - -namespace NurbsBasis { - void myPrint11(int a, int b); -}; - - -#endif //NURBSEVALUATOR_NURBSBASIS_CUH diff --git a/NurbsEvaluator.cu b/NurbsEvaluator.cu deleted file mode 100644 index da9c233..0000000 --- a/NurbsEvaluator.cu +++ /dev/null @@ -1,791 +0,0 @@ -// -// Created by 14727 on 2022/11/7. -// - -#include - -#include "NurbsEvaluator.cuh" -#include "cstdio" -#include "utils.h" -//#include "NurbsBasis.cuh" - -__device__ void normalization(float &a, float &b, float &c) { - float sumA = a * a; - float sumB = b * b; - float sumC = c * c; - float sum = sumA + sumB + sumC; - a = sqrt(sumA / sum); - b = sqrt(sumB / sum); - c = sqrt(sumC / sum); -} - -//extern __device__ void NurbsBasis::d_basisFunction(float *N_Texture, const float *knots, float u, int degree, int d_knotsCnt) {}; - -__host__ NurbsSurface::Evaluator::Evaluator(std::vector>> controlPoints, - std::vector knots_u, std::vector knots_v) { - this->knots_u = std::move(knots_u); - this->knots_v = std::move(knots_v); - this->controlPoints = std::move(controlPoints); - recordTime = false; - d_nTexture_u = nullptr; - d_nTexture_v = nullptr; - d_nTexture1_u = nullptr; - d_nTexture1_v = nullptr; - d_knots_u = nullptr; - d_knots_v = nullptr; - d_points = nullptr; - d_derivatives = nullptr; -} - -__host__ std::vector, std::vector>> -NurbsSurface::Evaluator::evaluate(int sampleCnt_u, int sampleCnt_v) { - if (POINT_SIZE != controlPoints[0][0].size()) { - printf("Error! Nurbs控制点应表示为长度为4的齐次坐标\n"); - return {}; - } - - // 构造指向device的controlPoints - const int pointsCnt_u = controlPoints.size(), pointsCnt_v = controlPoints[0].size(); - const int pointsBytes = pointsCnt_u * pointsCnt_v * POINT_SIZE * sizeof(float); - auto *h_points = (float *) malloc(pointsBytes); - for (int i = 0; i < pointsCnt_u; i++) { - for (int j = 0; j < pointsCnt_v; j++) { - for (int k = 0; k < POINT_SIZE; k++) { - h_points[(i * pointsCnt_v + j) * POINT_SIZE + k] = controlPoints[i][j][k]; - } - } - } - cudaMalloc((void **) &d_points, pointsBytes); - cudaMemcpy(d_points, h_points, pointsBytes, cudaMemcpyHostToDevice); - - // 构造指向device的knots - const int knotsCnt_u = knots_u.size(), knotsCnt_v = knots_v.size(); - const int knotsBytes_u = knotsCnt_u * sizeof(float), knotsBytes_v = knotsCnt_v * sizeof(float); - auto *h_knots_u = (float *) malloc(knotsBytes_u), *h_knots_v = (float *) malloc(knotsBytes_v); - for (int i = 0; i < knotsCnt_u; i++) h_knots_u[i] = knots_u[i]; - for (int i = 0; i < knotsCnt_v; i++) h_knots_v[i] = knots_v[i]; - - cudaMalloc((void **) &d_knots_u, knotsBytes_u); - cudaMalloc((void **) &d_knots_v, knotsBytes_v); - cudaMemcpy(d_knots_u, h_knots_u, knotsBytes_u, cudaMemcpyHostToDevice); - cudaMemcpy(d_knots_v, h_knots_v, knotsBytes_v, cudaMemcpyHostToDevice); - - // 构造nTexture - cudaMalloc((void **) &d_nTexture_u, - sampleCnt_u * pointsCnt_u * sizeof(float)); // 注意nTexture的大小,在算梯度时用得到i=pointsCnt + 1的基函数值 - cudaMalloc((void **) &d_nTexture_v, sampleCnt_v * pointsCnt_v * sizeof(float)); - - // 构造nTexture1 - cudaMalloc((void **) &d_nTexture1_u, sampleCnt_u * (pointsCnt_u + 1) * sizeof(float)); - cudaMalloc((void **) &d_nTexture1_v, sampleCnt_v * (pointsCnt_v + 1) * sizeof(float)); - - // 构造g_basisTexture线程层级 - dim3 blockBasis(512); - dim3 gridBasis_u((sampleCnt_u + blockBasis.x - 1) / blockBasis.x); - dim3 gridBasis_v((sampleCnt_v + blockBasis.x - 1) / blockBasis.x); - - // 构造线程层级,调用核函数 - dim3 block(32, 32); - dim3 grid((sampleCnt_u + block.x - 1) / block.x, (sampleCnt_v + block.y - 1) / block.y); - // 记录用时 - double time_cost_device; - g_basisTexture<<>>(d_nTexture_u, d_nTexture1_u, d_knots_u, pointsCnt_u, knotsCnt_u, - sampleCnt_u); - cudaDeviceSynchronize(); - g_basisTexture<<>>(d_nTexture_v, d_nTexture1_v, d_knots_v, pointsCnt_v, knotsCnt_v, - sampleCnt_v); - cudaDeviceSynchronize(); - - if (recordTime) time_cost_device = utils::get_time_windows(); - g_evaluate <<>>(d_nTexture_u, d_nTexture_v, d_points, pointsCnt_u, pointsCnt_v, POINT_SIZE, - knots_u[knotsCnt_u - 1], knots_v[knotsCnt_v - 1], sampleCnt_u, sampleCnt_v); - cudaDeviceSynchronize(); // 所用线程结束后再获取结束时间。cudaThreadSynchronize()在CUDA1.0后被弃用 - if (recordTime) { - time_cost_device = utils::get_time_windows() - time_cost_device; - printf("GPU time cost of surface evaluation for %d samples: %lf\n", sampleCnt_u * sampleCnt_v, - time_cost_device); - } - - // 释放内存 - free(h_points); - free(h_knots_u); - free(h_knots_v); - printf("First derivatives and normal vectors calculated by GPU:\n"); - return {}; -} - - -__host__ std::vector>> -NurbsCurve::Evaluator::evaluate(int sampleCnt) { - - if (POINT_SIZE != controlPoints[0].size()) { - printf("Error! Nurbs控制点应表示为长度为4的齐次坐标\n"); - return {}; - } - // 构造指向device的controlPoints - const int pointsCnt = controlPoints.size(); - const int pointsBytes = pointsCnt * POINT_SIZE * sizeof(float); - auto *h_points = (float *) malloc(pointsBytes); - for (int i = 0; i < pointsCnt; i++) { - for (int j = 0; j < POINT_SIZE; j++) { - h_points[i * POINT_SIZE + j] = controlPoints[i][j]; - } - } - myCudaFree(d_points); // 注意内存管理 - cudaMalloc((void **) &d_points, pointsBytes); - cudaMemcpy(d_points, h_points, pointsBytes, cudaMemcpyHostToDevice); - - // 构造指向device的knots - const int knotsCnt = knots.size(); - const int knotsBytes = knotsCnt * sizeof(float); - auto *h_knots = (float *) malloc(knotsBytes); - for (int i = 0; i < knotsCnt; i++) h_knots[i] = knots[i]; - myCudaFree(d_knots); // 注意内存管理 - cudaMalloc((void **) &d_knots, knotsBytes); - cudaMemcpy(d_knots, h_knots, knotsBytes, cudaMemcpyHostToDevice); - - // 分配nTexture的内存。只需要GPU内存 -// float *d_nTexture = nullptr; - myCudaFree(d_nTexture); // 注意内存管理 - cudaMalloc((void **) &d_nTexture, - sampleCnt * pointsCnt * sizeof(float)); // 注意nTexture的大小,在算梯度时用得到i=pointsCnt + 1的基函数值 - - // 分配nTexture1的内存。只需要GPU内存 -// float *d_nTexture1 = nullptr; - myCudaFree(d_nTexture1); // 注意内存管理 - cudaMalloc((void **) &d_nTexture1, sampleCnt * (pointsCnt + 1) * sizeof(float)); - - // 构造g_basisTexture线程层级 - dim3 blockBasis(512); - dim3 gridBasis((sampleCnt + blockBasis.x - 1) / blockBasis.x); - - // 构造线程层级 - dim3 block(32, 32); - dim3 grid((sampleCnt + block.x * block.y - 1) / (block.x * block.y)); - // 记录用时 - double time_cost_device; - if (recordTime) time_cost_device = utils::get_time_windows(); - printf("there..\n"); - g_basisTexture <<>>(d_nTexture, d_nTexture1, d_knots, pointsCnt, knotsCnt, sampleCnt); -// cudaMemcpy(d_nTextureCpy, d_nTexture, nTextureBytes, cudaMemcpyDeviceToDevice); // 有同步功能 - cudaDeviceSynchronize(); - printf("here..\n"); - g_evaluate <<>>(d_nTexture, d_points, pointsCnt, POINT_SIZE, knots[knotsCnt - 1], sampleCnt); -// g_test<<<1,6>>>(d_nTextureCpy); - cudaDeviceSynchronize(); // 所用线程结束后再获取结束时间。cudaThreadSynchronize()在CUDA1.0后被弃用 - if (recordTime) { - time_cost_device = utils::get_time_windows() - time_cost_device; - printf("GPU time cost of curve evaluation for %d samples: %lf\n", sampleCnt, time_cost_device); - } - free(h_points); - free(h_knots); - return {}; -} - -__host__ void NurbsSurface::Evaluator::derivative(int sampleCnt_u, int sampleCnt_v) { - // 先完成evaluation - evaluate(sampleCnt_u, sampleCnt_v); - - if (POINT_SIZE != controlPoints[0][0].size()) { - printf("Error! Nurbs控制点应表示为长度为4的齐次坐标\n"); - return; - } - - float *d_derTexture_u = nullptr; - float *d_derTexture_v = nullptr; - const int pointsCnt_u = controlPoints.size(), pointsCnt_v = controlPoints[0].size(); - const int knotsCnt_u = knots_u.size(), knotsCnt_v = knots_v.size(); - cudaMalloc((void **) &d_derTexture_u, sampleCnt_u * pointsCnt_u * sizeof(float)); - cudaMalloc((void **) &d_derTexture_v, sampleCnt_v * pointsCnt_v * sizeof(float)); - - // 构造切向量计算结果 - myCudaFree(d_derivatives); - cudaMalloc((void **) &d_derivatives, - sampleCnt_v * sampleCnt_u * 6 * sizeof(float)); // 每个采用所求的切向量是一个六元向量,前三位是对u的偏导、后三位是对v的偏导 - - // 构造线程层级 - dim3 block(32, 32); - dim3 grid((sampleCnt_u + block.x - 1) / block.x, (sampleCnt_v + block.y - 1) / block.y); - // 构造g_basisTexture线程层级 - dim3 blockTex(512); - dim3 gridTex_u((sampleCnt_u + blockTex.x - 1) / blockTex.x); - dim3 gridTex_v((sampleCnt_v + blockTex.x - 1) / blockTex.x); - // 记录用时 - double time_cost_device; - if (recordTime) time_cost_device = utils::get_time_windows(); - g_derTexture<<>>(d_derTexture_u, d_nTexture1_u, d_knots_u, pointsCnt_u, knotsCnt_u, - sampleCnt_u); - g_derTexture<<>>(d_derTexture_v, d_nTexture1_v, d_knots_v, pointsCnt_v, knotsCnt_v, - sampleCnt_v); - cudaDeviceSynchronize(); - g_derivative<<>>(d_derivatives, d_derTexture_u, d_derTexture_v, d_nTexture_u, d_nTexture_v, d_points, - pointsCnt_u, - pointsCnt_v, POINT_SIZE, knots_u[knotsCnt_u - 1], knots_v[knotsCnt_v - 1], - sampleCnt_u, - sampleCnt_v); - cudaDeviceSynchronize(); // 所用线程结束后再获取结束时间。cudaThreadSynchronize()在CUDA1.0后被弃用 - if (recordTime) { - time_cost_device = utils::get_time_windows() - time_cost_device; - printf("GPU time cost of surface first derivative calculating for %d samples: %lf\n", sampleCnt_u * sampleCnt_v, - time_cost_device); - } - cudaFree(d_derTexture_u); - cudaFree(d_derTexture_v); -} - -__host__ void NurbsCurve::Evaluator::derivative(int sampleCnt) { - // 先完成evaluation - evaluate(sampleCnt); - - if (POINT_SIZE != controlPoints[0].size()) { - printf("Error! Nurbs控制点应表示为长度为4的齐次坐标\n"); - return; - } - - float *d_derTexture = nullptr; - const int pointsCnt = controlPoints.size(); - const int knotsCnt = knots.size(); - cudaMalloc((void **) &d_derTexture, sampleCnt * pointsCnt * sizeof(float)); - - // 构造线程层级 - dim3 block(32, 32); - dim3 grid((sampleCnt + block.x * block.y - 1) / (block.x * block.y)); - // 构造g_basisTexture线程层级 - dim3 blockTex(512); - dim3 gridTex((sampleCnt + blockTex.x - 1) / blockTex.x); - - - // 构造切向量计算结果 - myCudaFree(d_derivatives); - cudaMalloc((void **) &d_derivatives, sampleCnt * 3 * sizeof(float)); // 每个采用所求的切向量是一个三维向量 - - // 记录用时 - double time_cost_device; - if (recordTime) time_cost_device = utils::get_time_windows(); - g_derTexture<<>>(d_derTexture, d_nTexture1, d_knots, pointsCnt, knotsCnt, sampleCnt); - cudaDeviceSynchronize(); - g_derivative<<>>(d_derivatives, d_derTexture, d_nTexture, d_points, pointsCnt, POINT_SIZE, - knots[knotsCnt - 1], sampleCnt); - cudaDeviceSynchronize(); // 所用线程结束后再获取结束时间。cudaThreadSynchronize()在CUDA1.0后被弃用 - if (recordTime) { - time_cost_device = utils::get_time_windows() - time_cost_device; - printf("GPU time cost of curve first derivative calculating for %d samples: %lf\n", sampleCnt, - time_cost_device); - } - cudaFree(d_derTexture); -} - -__host__ void NurbsSurface::Evaluator::curvature(int sampleCnt_u, int sampleCnt_v) { - // 先计算切向量 - derivative(sampleCnt_u, sampleCnt_v); - if (POINT_SIZE != controlPoints[0][0].size()) { - printf("Error! Nurbs控制点应表示为长度为4的齐次坐标\n"); - return; - } - - // 构造线程层级 - dim3 block(32, 32); - dim3 grid((sampleCnt_u + block.x - 1) / block.x, (sampleCnt_v + block.y - 1) / block.y); - - // 记录用时 - double time_cost_device; - if (recordTime) time_cost_device = utils::get_time_windows(); - g_curvature<<>>(d_derivatives, sampleCnt_u, sampleCnt_v, knots_u[knots_u.size() - 1], - knots_v[knots_v.size() - 1]); - cudaDeviceSynchronize(); // 所用线程结束后再获取结束时间。cudaThreadSynchronize()在CUDA1.0后被弃用 - if (recordTime) { - time_cost_device = utils::get_time_windows() - time_cost_device; - printf("GPU time cost of surface second derivative calculating for %d samples: %lf\n", - sampleCnt_u * sampleCnt_v, - time_cost_device); - } -} - -__host__ void NurbsCurve::Evaluator::curvature(int sampleCnt) { - // 先计算切向量 - derivative(sampleCnt); - - if (POINT_SIZE != controlPoints[0].size()) { - printf("Error! Nurbs控制点应表示为长度为4的齐次坐标\n"); - return; - } - - // 构造线程层级 - dim3 block(32, 32); - dim3 grid((sampleCnt + block.x * block.y - 1) / (block.x * block.y)); - - // 记录用时 - double time_cost_device; - if (recordTime) time_cost_device = utils::get_time_windows(); - g_curvature<<>>(d_derivatives, sampleCnt, knots[knots.size() - 1]); - cudaDeviceSynchronize(); // 所用线程结束后再获取结束时间。cudaThreadSynchronize()在CUDA1.0后被弃用 - if (recordTime) { - time_cost_device = utils::get_time_windows() - time_cost_device; - printf("GPU time cost of curve second derivative calculating for %d samples: %lf\n", sampleCnt, - time_cost_device); - } -} - - -//__global__ void -//NurbsSurface::g_evaluate(const float *d_points, const float *d_knots_u, const float *d_knots_v, -// int d_pointsCnt_u, -// int d_pointsCnt_v, int d_POINT_SIZE, int d_knotsCnt_u, int d_knotsCnt_v, -// int d_sampleCnt_u, int d_sampleCnt_v) { -//// printf(" surface calculating... \n"); -// // 二维grid和二维的block -// int ix = int(blockIdx.x * blockDim.x + threadIdx.x); -// int iy = int(blockIdx.y * blockDim.y + threadIdx.y); -// -// float d_paramCeil_u = d_knots_u[d_knotsCnt_u - 1]; -// float d_paramCeil_v = d_knots_v[d_knotsCnt_v - 1]; -// -// float u = ix * d_paramCeil_u / (d_sampleCnt_u - 1); -// float v = iy * d_paramCeil_v / (d_sampleCnt_v - 1); -// -// if (u > 1.0 * d_paramCeil_u || v > 1.0 * d_paramCeil_v) { -// return; -// } -// -// int d_degree_u = d_knotsCnt_u - 1 - d_pointsCnt_u; -// int d_degree_v = d_knotsCnt_v - 1 - d_pointsCnt_v; -// // 注意,在device中,全局内存还是以malloc和free的方式分配和回收的,而不是使用cudaMalloc和cudaFree -// auto *N_Texture_U = (float *) malloc((d_degree_u + 1) * (d_knotsCnt_u - 1) * sizeof(float)); -// auto *N_Texture_V = (float *) malloc((d_degree_v + 1) * (d_knotsCnt_v - 1) * sizeof(float)); -// d_basisFunction(N_Texture_U, d_knots_u, u, d_degree_u, d_knotsCnt_u); -// d_basisFunction(N_Texture_V, d_knots_v, v, d_degree_v, d_knotsCnt_v); -// float x = 0., y = 0., z = 0.; -// for (int i = 0; i < d_pointsCnt_u; i++) { -// for (int j = 0; j < d_pointsCnt_v; j++) { -// float N_U = N_Texture_U[d_degree_u * (d_knotsCnt_u - 1) + i]; -// float N_V = N_Texture_V[d_degree_v * (d_knotsCnt_v - 1) + j]; -// int idx = (i * d_pointsCnt_v + j) * d_POINT_SIZE; -// x += N_U * N_V * d_points[idx]; -// y += N_U * N_V * d_points[idx + 1]; -// z += N_U * N_V * d_points[idx + 2]; -// } -// } -// printf("(%g, %g)-->(%g, %g, %g)\n", u, v, x, y, z); // %g输出,舍弃无意义的0 -// free(N_Texture_U); -// free(N_Texture_V); -//} - -__global__ void -NurbsSurface::g_evaluate(const float *d_nTexture_u, const float *d_nTexture_v, const float *d_points, int d_pointsCnt_u, - int d_pointsCnt_v, int d_POINT_SIZE, float d_lastKnot_u, float d_lastKnot_v, int d_sampleCnt_u, - int d_sampleCnt_v) { - - // 二维grid和二维的block - int ix = blockIdx.x * blockDim.x + threadIdx.x; - int iy = blockIdx.y * blockDim.y + threadIdx.y; - - float u = ix * d_lastKnot_u / (d_sampleCnt_u - 1); - float v = iy * d_lastKnot_v / (d_sampleCnt_v - 1); - - if (u > 1.0 * d_lastKnot_u || v > 1.0 * d_lastKnot_v) { - return; - } - - float x = 0., y = 0., z = 0., sumW = 0.; - for (int i = 0; i < d_pointsCnt_u; i++) { - float N_U = d_nTexture_u[ix * d_pointsCnt_u + i]; - for (int j = 0; j < d_pointsCnt_v; j++) { - float N_V = d_nTexture_v[iy * d_pointsCnt_v + j]; - int idx = (i * d_pointsCnt_v + j) * d_POINT_SIZE; - float w = d_points[idx + 3]; - x += N_U * N_V * w * d_points[idx]; - y += N_U * N_V * w * d_points[idx + 1]; - z += N_U * N_V * w * d_points[idx + 2]; - sumW += N_U * N_V * w; - } - } - x = x / sumW; - y = y / sumW; - z = z / sumW; -// printf("(%g, %g)-->(%g, %g, %g)\n", u, v, x, y, z); // %g输出,舍弃无意义的0 -} - -__global__ void -NurbsSurface::g_derivative(float *derivatives, const float *derTexture_u, const float *derTexture_v, - const float *nTexture_u, const float *nTexture_v, const float *d_points, int d_pointsCnt_u, - int d_pointsCnt_v, int d_POINT_SIZE, float d_lastKnot_u, float d_lastKnot_v, - int d_sampleCnt_u, int d_sampleCnt_v) { - // 二维grid和二维的block - int ix = blockIdx.x * blockDim.x + threadIdx.x; - int iy = blockIdx.y * blockDim.y + threadIdx.y; - - if (ix >= d_sampleCnt_u || iy >= d_sampleCnt_v) { - return; - } - float u = ix * d_lastKnot_u / (d_sampleCnt_u - 1); - float v = iy * d_lastKnot_v / (d_sampleCnt_v - 1); - - float nubsPdx_u = 0., nubsPdy_u = 0, nubsPdz_u = 0., nubsPdw_u = 0.; - float nubsPdx_v = 0., nubsPdy_v = 0, nubsPdz_v = 0., nubsPdw_v = 0.; - - for (int i = 0; i < d_pointsCnt_u; i++) { - for (int j = 0; j < d_pointsCnt_u; j++) { - int baseIdx = (i * d_pointsCnt_v + j) * d_POINT_SIZE; - float factor_u = derTexture_u[ix * d_pointsCnt_u + i] * nTexture_v[iy * d_pointsCnt_v + j]; - float factor_v = derTexture_v[iy * d_pointsCnt_v + j] * nTexture_u[ix * d_pointsCnt_u + i]; - float wij = d_points[baseIdx + 3]; - nubsPdx_u += factor_u * wij * d_points[baseIdx]; - nubsPdy_u += factor_u * wij * d_points[baseIdx + 1]; - nubsPdz_u += factor_u * wij * d_points[baseIdx + 2]; - nubsPdw_u += factor_u * wij; - - nubsPdx_v += factor_v * wij * d_points[baseIdx]; - nubsPdy_v += factor_v * wij * d_points[baseIdx + 1]; - nubsPdz_v += factor_v * wij * d_points[baseIdx + 2]; - nubsPdw_v += factor_v * wij; - } - } - - float x = 0., y = 0., z = 0., w = 0.; - for (int i = 0; i < d_pointsCnt_u; i++) { - float N_U = nTexture_u[ix * d_pointsCnt_u + i]; - for (int j = 0; j < d_pointsCnt_v; j++) { - float N_V = nTexture_v[iy * d_pointsCnt_v + j]; - int idx = (i * d_pointsCnt_v + j) * d_POINT_SIZE; - float wij = d_points[idx + 3]; - x += N_U * N_V * wij * d_points[idx]; - y += N_U * N_V * wij * d_points[idx + 1]; - z += N_U * N_V * wij * d_points[idx + 2]; - w += N_U * N_V * wij; - } - } - - float w2 = w * w; - float pdx_u = (nubsPdx_u * w - x * nubsPdw_u) / w2; - float pdy_u = (nubsPdy_u * w - y * nubsPdw_u) / w2; - float pdz_u = (nubsPdz_u * w - z * nubsPdw_u) / w2; - - float pdx_v = (nubsPdx_v * w - x * nubsPdw_v) / w2; - float pdy_v = (nubsPdy_v * w - y * nubsPdw_v) / w2; - float pdz_v = (nubsPdz_v * w - z * nubsPdw_v) / w2; -// float pdz_u = (nubsPdz_u * w - z ) - - int baseIdx = (ix * d_sampleCnt_v + iy) * 6; - derivatives[baseIdx] = pdx_u; - derivatives[baseIdx + 1] = pdy_u; - derivatives[baseIdx + 2] = pdz_u; - derivatives[baseIdx + 3] = pdx_v; - derivatives[baseIdx + 4] = pdy_v; - derivatives[baseIdx + 5] = pdz_v; - - float x_n = pdy_u * pdz_v - pdy_v * pdz_u, y_n = pdx_v * pdz_u - pdx_u * pdz_v, z_n = - pdx_u * pdy_v - pdx_v * pdy_u; // 叉乘得到法向量 - if((ix == 8 && iy == 9) || (ix == 7 && iy == 9) || (ix == 9 && iy == 9) || (ix == 8 && iy == 8) || (ix == 8 && iy == 10)) - printf("(%g,%g)-->u:(%g, %g, %g), v:(%g,%g,%g), normal:(%g,%g,%g)\n", u, v, pdx_u, pdy_u, pdz_u, pdx_v, pdy_v, - pdz_v, x_n, y_n, z_n); -} - -__global__ void -NurbsSurface::g_curvature(const float *derivatives, int sampleCnt_u, int sampleCnt_v, float lastKnot_u, - float lastKnot_v) { - // 二维grid和二维的block - int ix = blockIdx.x * blockDim.x + threadIdx.x; - int iy = blockIdx.y * blockDim.y + threadIdx.y; - - if (ix >= sampleCnt_u || iy >= sampleCnt_v) { - return; - } - - - float step_u = lastKnot_u / (sampleCnt_u - 1), step_v = lastKnot_v / (sampleCnt_v - 1); - float u = ix * step_u, v = iy * step_v; - - int baseIdx = (ix * sampleCnt_v + iy) * 6; - int lastBaseIdx_u = ((ix - 1) * sampleCnt_v + iy) * 6, nextBaseIdx_u = ((ix + 1) * sampleCnt_v + iy) * 6; - int lastBaseIdx_v = (ix * sampleCnt_v + iy - 1) * 6, nextBaseIdx_v = (ix * sampleCnt_v + iy + 1) * 6; - -// printf("(%g,%g)-->u:(%g, %g, %g), v:(%g,%g,%g)\n", u, v, derivatives[baseIdx], derivatives[baseIdx + 1], -// derivatives[baseIdx + 2], derivatives[baseIdx + 3], derivatives[baseIdx + 4], derivatives[baseIdx + 5]); - - float sndPdx_uu, sndPdy_uu, sndPdz_uu, sndPdx_vv, sndPdy_vv, sndPdz_vv; // 二阶导 - float sndPdx_uv, sndPdy_uv, sndPdz_uv, sndPdx_vu, sndPdy_vu, sndPdz_vu; - - if (ix == 0) { - sndPdx_uu = (derivatives[nextBaseIdx_u] - derivatives[baseIdx]) / step_u; - sndPdy_uu = (derivatives[nextBaseIdx_u + 1] - derivatives[baseIdx + 1]) / step_u; - sndPdz_uu = (derivatives[nextBaseIdx_u + 2] - derivatives[baseIdx + 2]) / step_u; - - sndPdx_vu = (derivatives[nextBaseIdx_u + 3] - derivatives[baseIdx + 3]) / step_u; - sndPdy_vu = (derivatives[nextBaseIdx_u + 4] - derivatives[baseIdx + 4]) / step_u; - sndPdz_vu = (derivatives[nextBaseIdx_u + 5] - derivatives[baseIdx + 5]) / step_u; - } else if (ix == sampleCnt_u - 1) { - sndPdx_uu = (derivatives[baseIdx] - derivatives[lastBaseIdx_u]) / step_u; - sndPdy_uu = (derivatives[baseIdx + 1] - derivatives[lastBaseIdx_u + 1]) / step_u; - sndPdz_uu = (derivatives[baseIdx + 2] - derivatives[lastBaseIdx_u + 2]) / step_u; - - sndPdx_vu = (derivatives[baseIdx + 3] - derivatives[lastBaseIdx_u + 3]) / step_u; - sndPdy_vu = (derivatives[baseIdx + 4] - derivatives[lastBaseIdx_u + 4]) / step_u; - sndPdz_vu = (derivatives[baseIdx + 5] - derivatives[lastBaseIdx_u + 5]) / step_u; - } else { - sndPdx_uu = (derivatives[nextBaseIdx_u] - derivatives[lastBaseIdx_u]) / (2 * step_u); - sndPdy_uu = (derivatives[nextBaseIdx_u + 1] - derivatives[lastBaseIdx_u + 1]) / (2 * step_u); - sndPdz_uu = (derivatives[nextBaseIdx_u + 2] - derivatives[lastBaseIdx_u + 2]) / (2 * step_u); - - sndPdx_vu = (derivatives[nextBaseIdx_u + 3] - derivatives[lastBaseIdx_u + 3]) / (2 * step_u); - sndPdy_vu = (derivatives[nextBaseIdx_u + 4] - derivatives[lastBaseIdx_u + 4]) / (2 * step_u); - sndPdz_vu = (derivatives[nextBaseIdx_u + 5] - derivatives[lastBaseIdx_u + 5]) / (2 * step_u); - } - - if (iy == 0) { - sndPdx_vv = (derivatives[nextBaseIdx_v + 3] - derivatives[baseIdx + 3]) / step_v; - sndPdy_vv = (derivatives[nextBaseIdx_v + 4] - derivatives[baseIdx + 4]) / step_v; - sndPdz_vv = (derivatives[nextBaseIdx_v + 5] - derivatives[baseIdx + 5]) / step_v; - - sndPdx_uv = (derivatives[nextBaseIdx_v] - derivatives[baseIdx]) / step_v; - sndPdy_uv = (derivatives[nextBaseIdx_v + 1] - derivatives[baseIdx + 1]) / step_v; - sndPdz_uv = (derivatives[nextBaseIdx_v + 2] - derivatives[baseIdx + 2]) / step_v; - } else if (iy == sampleCnt_v - 1) { - sndPdx_vv = (derivatives[baseIdx + 3] - derivatives[lastBaseIdx_v + 3]) / step_v; - sndPdy_vv = (derivatives[baseIdx + 4] - derivatives[lastBaseIdx_v + 4]) / step_v; - sndPdz_vv = (derivatives[baseIdx + 5] - derivatives[lastBaseIdx_v + 5]) / step_v; - - sndPdx_uv = (derivatives[baseIdx] - derivatives[lastBaseIdx_v]) / step_v; - sndPdy_uv = (derivatives[baseIdx + 1] - derivatives[lastBaseIdx_v + 1]) / step_v; - sndPdz_uv = (derivatives[baseIdx + 2] - derivatives[lastBaseIdx_v + 2]) / step_v; - } else { - sndPdx_vv = (derivatives[nextBaseIdx_v + 3] - derivatives[lastBaseIdx_v + 3]) / (2 * step_v); - sndPdy_vv = (derivatives[nextBaseIdx_v + 4] - derivatives[lastBaseIdx_v + 4]) / (2 * step_v); - sndPdz_vv = (derivatives[nextBaseIdx_v + 5] - derivatives[lastBaseIdx_v + 5]) / (2 * step_v); - - sndPdx_uv = (derivatives[nextBaseIdx_v] - derivatives[lastBaseIdx_v]) / (2 * step_v); - sndPdy_uv = (derivatives[nextBaseIdx_v + 1] - derivatives[lastBaseIdx_v + 1]) / (2 * step_v); - sndPdz_uv = (derivatives[nextBaseIdx_v + 2] - derivatives[lastBaseIdx_v + 2]) / (2 * step_v); - } - - float uvx = (sndPdx_uv + sndPdx_vu) / 2, uvy = (sndPdy_uv + sndPdy_vu) / 2, uvz = (sndPdz_uv + sndPdz_vu) / 2; - normalization(sndPdx_uu, sndPdy_uu, sndPdz_uu); - normalization(uvx, uvy, uvz); - normalization(sndPdx_vv, sndPdy_vv, sndPdz_vv); - - - if(ix == 8 && iy == 9) - printf("(%g, %g) --> uu: (%g, %g, %g), uv: (%g, %g, %g), vv: (%g, %g, %g)\n", u, v, sndPdx_uu, sndPdy_uu, sndPdz_uu, - uvx, uvy, uvz, sndPdx_vv, sndPdy_vv, sndPdz_vv); -} - -__global__ void -NurbsCurve::g_evaluate(const float *NTexture, const float *d_points, const int d_pointsCnt, - const int d_POINT_SIZE, const float d_lastKnot, const int d_sampleCnt) { -// printf(" curve calculating... \n"); - // 二维grid和一维的block -// int idx = (blockIdx.y * gridDim.x + blockIdx.x) * blockDim.x + threadIdx.x; - // 二维block和一维grid - int idx = blockIdx.x * blockDim.x * blockDim.y + threadIdx.y * blockDim.x + threadIdx.x; - float u = idx * d_lastKnot / (d_sampleCnt - 1); - if (u > 1.0 * d_lastKnot) { - return; - } -// -// int d_degree = d_knotsCnt - 1 - d_pointsCnt; -// // 注意,在device中,全局内存还是以malloc和free的方式分配和回收的,而不是使用cudaMalloc和cudaFree -// auto *N_dp = (float *) malloc((d_degree + 1) * (d_knotsCnt - 1) * sizeof(float)); -// d_basisFunction(N_dp, d_knots, u, d_degree, d_knotsCnt); - float x = 0., y = 0., z = 0., sumW = 0.; - for (int i = 0; i < d_pointsCnt; i++) { - float N = NTexture[idx * d_pointsCnt + i]; - int baseIdx = i * d_POINT_SIZE; - float w = d_points[baseIdx + 3]; - x += N * w * d_points[baseIdx]; - y += N * w * d_points[baseIdx + 1]; - z += N * w * d_points[baseIdx + 2]; - sumW += N * w; - } - x = x / sumW; - y = y / sumW; - z = z / sumW; - printf("(%g)-->(%g, %g, %g)\n", u, x, y, z); // %g输出,舍弃无意义的0 -} - -__global__ void -NurbsCurve::g_derivative(float *derivatives, const float *derTexture, const float *nTexture, const float *d_points, - int d_pointsCnt, int d_POINT_SIZE, - float d_lastKnot, int d_sampleCnt) { - // 二维block和一维grid - int idx = blockIdx.x * blockDim.x * blockDim.y + threadIdx.y * blockDim.x + threadIdx.x; - if (idx >= d_sampleCnt) return; - float u = idx * d_lastKnot / (d_sampleCnt - 1); - float nubs_dx = 0., nubs_dy = 0., nubs_dz = 0., nubs_dw = 0.; -// printf("POINT_SIZE: %d\n", d_POINT_SIZE); - - for (int i = 0; i < d_pointsCnt; i++) { - int baseIdx = i * d_POINT_SIZE; - float nFactor = derTexture[idx * d_pointsCnt + i]; - float wi = d_points[baseIdx + 3]; - nubs_dx += nFactor * wi * d_points[baseIdx]; - nubs_dy += nFactor * wi * d_points[baseIdx + 1]; - nubs_dz += nFactor * wi * d_points[baseIdx + 2]; - nubs_dw += nFactor * wi; -// printf("(x, y, z): (%g, %g, %g)\n", d_points[baseIdx], d_points[baseIdx + 1], d_points[baseIdx + 2]); - } - - float x = 0., y = 0., z = 0., w = 0.; - for (int i = 0; i < d_pointsCnt; i++) { - float N = nTexture[idx * d_pointsCnt + i]; - int baseIdx = i * d_POINT_SIZE; - float wi = d_points[baseIdx + 3]; - x += N * wi * d_points[baseIdx]; - y += N * wi * d_points[baseIdx + 1]; - z += N * wi * d_points[baseIdx + 2]; - w += N * wi; - } - float dx = (nubs_dx * w - x * nubs_dw) / (w * w); - float dy = (nubs_dy * w - y * nubs_dw) / (w * w); - float dz = (nubs_dz * w - z * nubs_dw) / (w * w); - int baseIdx = idx * 3; - derivatives[baseIdx] = dx; - derivatives[baseIdx + 1] = dy; - derivatives[baseIdx + 2] = dz; - printf("(%g)-->(%g, %g, %g)\n", u, dx, dy, dz); -} - -__global__ void NurbsCurve::g_curvature(const float *derivatives, int sampleCnt, float lastKnot) { - // 二维block和一维grid - int idx = blockIdx.x * blockDim.x * blockDim.y + threadIdx.y * blockDim.x + threadIdx.x; - if (idx >= sampleCnt) return; - float step = lastKnot / (sampleCnt - 1); - float u = idx * step; - float sndPdx, sndPdy, sndPdz; // 二阶导 - int baseIdx = idx * 3, lastBaseIdx = (idx - 1) * 3, nextBaseIdx = (idx + 1) * 3; - if (idx == 0) { - sndPdx = (derivatives[nextBaseIdx] - derivatives[baseIdx]) / step; - sndPdy = (derivatives[nextBaseIdx + 1] - derivatives[baseIdx + 1]) / step; - sndPdz = (derivatives[nextBaseIdx + 2] - derivatives[baseIdx + 2]) / step; - } else if (idx == sampleCnt - 1) { - sndPdx = (derivatives[baseIdx] - derivatives[lastBaseIdx]) / step; - sndPdy = (derivatives[baseIdx + 1] - derivatives[lastBaseIdx + 1]) / step; - sndPdz = (derivatives[baseIdx + 2] - derivatives[lastBaseIdx + 2]) / step; - } else { - sndPdx = (derivatives[nextBaseIdx] - derivatives[lastBaseIdx]) / (2 * step); - sndPdy = (derivatives[nextBaseIdx + 1] - derivatives[lastBaseIdx + 1]) / (2 * step); - sndPdz = (derivatives[nextBaseIdx + 2] - derivatives[lastBaseIdx + 2]) / (2 * step); - } - printf("%g --> (%g, %g, %g)\n", u, sndPdx, sndPdy, sndPdz); -} - -__global__ void g_basisTexture(float *nTexture, float *nTexture1, const float *d_knots, int d_pointsCnt, int d_knotsCnt, - int d_sampleCnt) { - // 一维grid和一维block - int idx = blockIdx.x * blockDim.x + threadIdx.x; // 采样点编号 - float d_paramCeil = d_knots[d_knotsCnt - 1]; - float u = idx * d_paramCeil / (d_sampleCnt - 1); - if (u > 1.0 * d_paramCeil) { - return; - } - int d_degree = d_knotsCnt - 1 - d_pointsCnt; - auto *N_dp = (float *) malloc((d_degree + 1) * (d_knotsCnt - 1) * sizeof(float)); - d_basisFunction(N_dp, d_knots, u, d_degree, d_knotsCnt); - for (int i = 0; i < d_pointsCnt; i++) { - nTexture[idx * d_pointsCnt + i] = N_dp[d_degree * (d_knotsCnt - 1) + i]; - nTexture1[idx * (d_pointsCnt + 1) + i] = N_dp[(d_degree - 1) * (d_knotsCnt - 1) + i]; -// printf("nTexture1: %g ", nTexture1[idx * (d_pointsCnt + 1) + i]); - } - nTexture1[idx * (d_pointsCnt + 1) + d_pointsCnt] = N_dp[(d_degree - 1) * (d_knotsCnt - 1) + - d_pointsCnt]; // nTexture1多记录一列数据 - free(N_dp); -} - - -__global__ void -g_derTexture(float *derTexture, const float *nTexture1, const float *d_knots, int d_pointsCnt, int d_knotsCnt, - int d_sampleCnt) { - // 一维grid和一维block - int idx = blockIdx.x * blockDim.x + threadIdx.x; // 采样点编号 - if (idx >= d_sampleCnt) { - return; - } - int degree = d_knotsCnt - 1 - d_pointsCnt; - -// printf("degree: %d\n", degree); - for (int i = 0; i < d_pointsCnt; i++) { - float left = d_floatEqual(d_knots[i + degree], d_knots[i]) ? 0 : - nTexture1[idx * (d_pointsCnt + 1) + i] * (degree - 1) / (d_knots[i + degree] - d_knots[i]); - float right = d_floatEqual(d_knots[i + degree + 1], d_knots[i + 1]) ? 0 : - nTexture1[idx * (d_pointsCnt + 1) + i + 1] * (degree - 1) / - (d_knots[i + degree + 1] - d_knots[i + 1]); - derTexture[idx * d_pointsCnt + i] = left - right; -// printf("<%d, %d> -- %g \n", idx, i, left - right); -// printf("nTex1: %g \n", nTexture1[idx * (d_pointsCnt + 1) + i]); - } -} - -__host__ NurbsCurve::Evaluator::Evaluator(std::vector> controlPoints, - std::vector knots) { - this->knots = std::move(knots); - this->controlPoints = std::move(controlPoints); - recordTime = false; - d_nTexture = nullptr; - d_nTexture1 = nullptr; - d_points = nullptr; - d_knots = nullptr; - d_derivatives = nullptr; -} - - -__device__ void d_basisFunction(float *N_Texture, const float *knots, float u, int degree, int d_knotsCnt) { - int m = d_knotsCnt - 1; - for (int p = 0; p <= degree; p++) { - for (int i = 0; i + p <= m - 1; i++) { - if (p == 0) { - if ((u > knots[i] || d_floatEqual(u, knots[i])) && (u < knots[i + 1]) - || - d_floatEqual(u, knots[i + 1]) && d_floatEqual(u, knots[m])) { - N_Texture[p * m + i] = 1.0; - } else { - N_Texture[p * m + i] = 0.0; - } - } else { - float Nip_1 = N_Texture[(p - 1) * m + i]; - float Ni1p_1 = N_Texture[(p - 1) * m + i + 1]; - float left = d_floatEqual(knots[i + p], knots[i]) ? 0 : (u - knots[i]) * Nip_1 / - (knots[i + p] - knots[i]); - float right = d_floatEqual(knots[i + p + 1], knots[i + 1]) ? 0 : (knots[i + p + 1] - u) * Ni1p_1 / - (knots[i + p + 1] - knots[i + 1]); - N_Texture[p * m + i] = left + right; - } - } - } -} - -__device__ bool d_floatEqual(float a, float b) { - return abs(a - b) < 0.00001; -} - -__host__ void myCudaFree(float *&p) { - if (p != nullptr) { - cudaFree(p); - p = nullptr; - } -} - - -void NurbsCurve::Evaluator::setRecordTime(bool r) { - recordTime = r; -} - -void NurbsSurface::Evaluator::setRecordTime(bool r) { - recordTime = r; -} - -NurbsSurface::Evaluator::~Evaluator() { - myCudaFree(d_nTexture_u); - myCudaFree(d_nTexture_v); - myCudaFree(d_nTexture1_u); - myCudaFree(d_nTexture1_v); - myCudaFree(d_points); - myCudaFree(d_knots_u); - myCudaFree(d_knots_v); - cudaDeviceReset(); -} - -NurbsCurve::Evaluator::~Evaluator() { - myCudaFree(d_nTexture); - myCudaFree(d_nTexture1); - myCudaFree(d_points); - myCudaFree(d_knots); - cudaDeviceReset(); -} \ No newline at end of file diff --git a/NurbsEvaluator.cuh b/NurbsEvaluator.cuh deleted file mode 100644 index 3a0ca94..0000000 --- a/NurbsEvaluator.cuh +++ /dev/null @@ -1,191 +0,0 @@ -#ifndef UNTITLED1_NURBSEVALUATOR_CUH -#define UNTITLED1_NURBSEVALUATOR_CUH - -#include -#include -#include -#include - -const int POINT_SIZE = 4; - -/** - * 保证释放后的指针指向空。这样一来保证指针不乱指,free的时候不会出错、二来可以判断指针是否已经free - * 注意指针是引用传参,因为要把指针本身置空 - */ -__host__ void myCudaFree(float *&p); - -namespace NurbsSurface { - /** - * 曲线计算的核函数 - * @param d_pointSize 点的大小(3: [x, y, z] | 4:[x, y, z, w]) - */ - __global__ static void - g_evaluate(const float *d_nTexture_u, const float *d_nTexture_v, const float *d_points, int d_pointsCnt_u, - int d_pointsCnt_v, int d_pointSize, float d_lastKnot_u, float d_lastKnot_v, int d_sampleCnt_u, - int d_sampleCnt_v); - - __global__ static void - g_derivative(float *derivatives, const float *derTexture_u, const float *derTexture_v, const float *nTexture_u, - const float *nTexture_v, - const float *d_points, int d_pointsCnt_u, int d_pointsCnt_v, int d_pointSize, float d_lastKnot_u, - float d_lastKnot_v, int d_sampleCnt_u, int d_sampleCnt_v); - - __global__ static void - g_curvature(const float *derivatives, int sampleCnt_u, int sampleCnt_v, float lastKnot_u, float lastKnot_v); - - class Evaluator { - private: - std::vector>> controlPoints; - float *d_points; - std::vector knots_u; - std::vector knots_v; - float *d_knots_u; - float *d_knots_v; - bool recordTime; - - float *d_nTexture_u; // u方向指向度为p时的device中的nurbs基函数矩阵 - float *d_nTexture_v; // v方向指向度为p时的device中的nurbs基函数矩阵 - float *d_nTexture1_u; // u方向指向度为p-1时的device中的nurbs基函数矩阵 - float *d_nTexture1_v; // v方向指向度为p-1时的device中的nurbs基函数矩阵 - - float *d_derivatives; // 一阶导计算结果 - -// int sampleCnt_u; -// int sampleCnt_v; - - public: - /** - * 构造函数 - * @param controlPoints 控制点矩阵[pointsCnt_u][pointsCnt_v][3] - * @param knots_u u方向knots - * @param knots_v v方向knots - */ - __host__ explicit Evaluator(std::vector>> controlPoints, - std::vector knots_u, std::vector knots_v); - - /** - * 供外部CPU程序使用的、负责调用gpu并行计算的方法 - * @param sampleCnt_u u方向采样数目 - * @param sampleCnt_v v方向采样数目 - * @return 由 map 组成的vector{<, {x, y, z}>} - */ - __host__ std::vector, std::vector>> - evaluate(int sampleCnt_u_, int sampleCnt_v_); - - /** - * 供外部CPU程序使用的、负责调用gpu并行计算切向量的方法 - */ - __host__ void derivative(int sampleCnt_u, int sampleCnt_v); - - /** - * 供外部CPU程序使用的、负责调用gpu并行计算二阶导的方法 - */ - __host__ void curvature(int sampleCnt_u, int sampleCnt_v); - - void setRecordTime(bool r); - - ~Evaluator(); - - }; -} - -/** - * 曲线部分 - */ -namespace NurbsCurve { - - __global__ void g_test(float *nTexture); - - /** - * 曲线计算的核函数 - * @param d_pointSize 点的大小(3: [x, y, z] | 4:[x, y, z, w]) - */ - __global__ static void - g_evaluate(const float *NTexture, const float *d_points, int d_pointsCnt, int d_pointSize, - float d_lastKnot, int d_sampleCnt); - - __global__ static void - g_derivative(float *derivatives, const float *derTexture, const float *nTexture, const float *d_points, - int d_pointsCnt, int d_pointSize, float d_lastKnot, - int d_sampleCnt); - - __global__ static void g_curvature(const float *derivatives, int sampleCnt, float lastKnot); - - class Evaluator { - private: - std::vector> controlPoints; - std::vector knots; - float *d_knots; - float *d_points; - bool recordTime; - - float *d_nTexture; // 指向度为p时的device中的nurbs基函数矩阵 - float *d_nTexture1; // 指向度为p-1时的device中的nurbs基函数矩阵 - - float *d_derivatives{}; // 一阶导计算结果 - - public: - /** - * 构造函数 - * @param controlPoints 控制点矩阵[pointsCnt][3] - */ - __host__ explicit Evaluator(std::vector> controlPoints, std::vector knots); - - /** - * 供外部CPU程序使用的、负责调用gpu并行进行evaluation的方法 - * @param sampleCnt_ 在参数域内均匀采样的采样数,它会更新成员变量中的sampleCnt - * @return 由 map 组成的vector{} - */ - __host__ std::vector>> evaluate(int sampleCnt_); - - /** - * 供外部CPU程序使用的、负责调用gpu并行计算切向量的方法 - */ - __host__ void derivative(int sampleCnt); - - /** - * 供外部CPU程序使用的、负责调用gpu并行计算二阶导的方法 - */ - __host__ void curvature(int sampleCnt); - - __host__ ~Evaluator(); - - void setRecordTime(bool r); - }; -} - - - - - -/** - * 计算并保存基函数值 - * @param nTexture 记录度数为p的基函数值,规模为【sampleCnt,pointsCnt】 - * @param nTexture1 记录度数为p-1的基函数值,规模为【sampleCnt+1,pointsCnt】 - */ -__global__ static void -g_basisTexture(float *nTexture, float *nTexture1, const float *d_knots, int d_pointsCnt, int d_knotsCnt, - int d_sampleCnt); - -/** - * 计算并保存基函数对采样点切向量的分量值 - * @param derTexture 记录度数为p的Nurbs基函数对采样点切向量的分量值,大小为【sampleCnt,pointsCnt】 - * @param nTexture1 度数为p-1的基函数值,规模为【sampleCnt+1,pointsCnt】 - */ -__global__ static void -g_derTexture(float *derTexture, const float *nTexture1, const float *d_knots, int d_pointsCnt, int d_knotsCnt, - int d_sampleCnt); - -/** - * 当u值已知时,根据基函数N的递推表达式,采用动态规划的方式求解N值 - * @param N_Texture 结果返回在N_Texture中 - */ -__device__ void d_basisFunction(float *nTexture, const float *knots, float u, int degree, int d_knotsCnt); - -/** - * device中判断两个浮点数是否相等。与CPU中一样,GPU中的浮点数也存在很小的误差,直接使用==判断往往容易将相等误判为不等 - * @return true:相等 - */ -__device__ bool d_floatEqual(float a, float b); - -#endif \ No newline at end of file diff --git a/README.md b/README.md index dbdc060..c11bfef 100644 --- a/README.md +++ b/README.md @@ -15,13 +15,15 @@ A tool for evaluating multiple NURBS curve/surface points using the GPU. 根据CMake构建项目,运行main文件即可生成可执行文件 ### 作为依赖使用 1. CMakeLists.txt中注释以下代码,不再生成可执行文件 + ```cmake -add_executable(NurbsEvaluator main.cpp NurbsEvaluator.cu NurbsEvaluator.cuh utils.cpp utils.h) +add_executable(NurbsEvaluator src/main.cpp src/utils.cpp include/utils.h) ``` 2. CMakeLists.txt中取消注释以下代码,表示需要生成静态库。构建项目。 + ```cmake set(CMAKE_ARCHIVE_OUTPUT_DIRECTORY ${CMAKE_BINARY_DIR}/lib) -add_library(NurbsEvaluator NurbsEvaluator.cu NurbsEvaluator.cuh utils.cpp utils.h) +add_library(NurbsEvaluator src/utils.cpp include/utils.h) ``` 3. 原项目连接到生成的静态库目录。如果是由CMake构建,可以用 target_link_libraries 指向依赖(.lib),用 target_include_directories 指向头文件目录。 4. 使用.cu文件调用NurbsEvaluator diff --git a/include/device/DeviceUtils.cuh b/include/device/DeviceUtils.cuh new file mode 100644 index 0000000..5ea9fa6 --- /dev/null +++ b/include/device/DeviceUtils.cuh @@ -0,0 +1,23 @@ +// +// Created by 14727 on 2022/12/9. +// + +#ifndef NURBSEVALUATOR_DEVICEUTILS_CUH +#define NURBSEVALUATOR_DEVICEUTILS_CUH + + +__device__ void d_safeFree(float * &p); + +/** + * device中判断两个浮点数是否相等。与CPU中一样,GPU中的浮点数也存在很小的误差,直接使用==判断往往容易将相等误判为不等 + * @return true:相等 + */ +__device__ bool d_floatEqual(float a, float b); + +/** + * 正则归一化,一般用于方向向量的归一化 + */ +__device__ void normalization(float &a, float &b, float &c); + + +#endif //NURBSEVALUATOR_DEVICEUTILS_CUH diff --git a/include/device/NurbsCommon.cuh b/include/device/NurbsCommon.cuh new file mode 100644 index 0000000..dd3bd2a --- /dev/null +++ b/include/device/NurbsCommon.cuh @@ -0,0 +1,32 @@ +// +// Created by 14727 on 2022/11/19. +// + +#ifndef NURBSEVALUATOR_NURBSCOMMON_CUH +#define NURBSEVALUATOR_NURBSCOMMON_CUH + +/** + * 当u值已知时,根据基函数N的递推表达式,采用动态规划的方式求解N值 + * @param N_Texture 结果返回在N_Texture中 + */ +__device__ void d_basisFunction(float *nTexture, const float *knots, float u, int degree, int d_knotsCnt); + +/** + * 计算并保存基函数值 + * @param nTexture 记录度数为p的基函数值,规模为【sampleCnt,pointsCnt】 + * @param nTexture1 记录度数为p-1的基函数值,规模为【sampleCnt+1,pointsCnt】 + */ +__global__ void +g_basisTexture(float *nTexture, float *nTexture1, const float *d_knots, int d_pointsCnt, int d_knotsCnt, + int d_sampleCnt); + +/** + * 计算并保存基函数对采样点切向量的分量值 + * @param derTexture 记录度数为p的Nurbs基函数对采样点切向量的分量值,大小为【sampleCnt,pointsCnt】 + * @param nTexture1 度数为p-1的基函数值,规模为【sampleCnt+1,pointsCnt】 + */ +__global__ void +g_derTexture(float *derTexture, const float *nTexture1, const float *d_knots, int d_pointsCnt, int d_knotsCnt, + int d_sampleCnt); + +#endif //NURBSEVALUATOR_NURBSCOMMON_CUH diff --git a/include/device/NurbsCurve.cuh b/include/device/NurbsCurve.cuh new file mode 100644 index 0000000..17e105e --- /dev/null +++ b/include/device/NurbsCurve.cuh @@ -0,0 +1,73 @@ +// +// Created by 14727 on 2022/12/9. +// + +#ifndef NURBSEVALUATOR_NURBSCURVE_CUH +#define NURBSEVALUATOR_NURBSCURVE_CUH + +#include +#include +//#include + +namespace NurbsCurve { + const int POINT_SIZE = 4; + /** + * 曲线计算的核函数 + * @param d_pointSize 点的大小(3: [x, y, z] | 4:[x, y, z, w]) + */ + __global__ static void + g_evaluate(float *res, const float *NTexture, const float *d_points, int d_pointsCnt, int d_pointSize, + float d_lastKnot, int d_sampleCnt); + + __global__ static void + g_derivative(float *derivatives, const float *derTexture, const float *nTexture, const float *d_points, + int d_pointsCnt, int d_pointSize, float d_lastKnot, + int d_sampleCnt); + + __global__ static void g_curvature(const float *derivatives, int sampleCnt, float lastKnot); + + class Curve { + private: + std::vector> controlPoints; + std::vector knots; + float *d_knots; + float *d_points; + bool recordTime; + + float *d_nTexture; // 指向度为p时的device中的nurbs基函数矩阵 + float *d_nTexture1; // 指向度为p-1时的device中的nurbs基函数矩阵 + + float *d_derivatives{}; // 一阶导计算结果 + + public: + /** + * 构造函数 + * @param controlPoints 控制点矩阵[pointsCnt][3] + */ + explicit Curve(std::vector> controlPoints, std::vector knots); + + /** + * 供外部CPU程序使用的、负责调用gpu并行进行evaluation的方法 + * @param sampleCnt_ 在参数域内均匀采样的采样数,它会更新成员变量中的sampleCnt + * @return 由 map 组成的vector{} + */ + std::vector> evaluate(int sampleCnt_); + + /** + * 供外部CPU程序使用的、负责调用gpu并行计算切向量的方法 + */ + void derivative(int sampleCnt); + + /** + * 供外部CPU程序使用的、负责调用gpu并行计算二阶导的方法 + */ + void curvature(int sampleCnt); + + ~Curve(); + + void setRecordTime(bool r); + }; +} + + +#endif //NURBSEVALUATOR_NURBSCURVE_CUH diff --git a/include/device/NurbsSurface.cuh b/include/device/NurbsSurface.cuh new file mode 100644 index 0000000..feb2f21 --- /dev/null +++ b/include/device/NurbsSurface.cuh @@ -0,0 +1,85 @@ +// +// Created by 14727 on 2022/12/9. +// + +#ifndef NURBSEVALUATOR_NURBSSURFACE_CUH +#define NURBSEVALUATOR_NURBSSURFACE_CUH +#include +#include "cuda_runtime.h" + +namespace NurbsSurface { + const int POINT_SIZE = 4; + /** + * 曲线计算的核函数 + * @param d_pointSize 点的大小(3: [x, y, z] | 4:[x, y, z, w]) + */ + __global__ static void + g_evaluate(float* res, const float *d_nTexture_u, const float *d_nTexture_v, const float *d_points, int d_pointsCnt_u, + int d_pointsCnt_v, int d_pointSize, float d_lastKnot_u, float d_lastKnot_v, int d_sampleCnt_u, + int d_sampleCnt_v); + + __global__ static void + g_derivative(float *derivatives, const float *derTexture_u, const float *derTexture_v, const float *nTexture_u, + const float *nTexture_v, + const float *d_points, int d_pointsCnt_u, int d_pointsCnt_v, int d_pointSize, float d_lastKnot_u, + float d_lastKnot_v, int d_sampleCnt_u, int d_sampleCnt_v); + + __global__ static void + g_curvature(const float *derivatives, int sampleCnt_u, int sampleCnt_v, float lastKnot_u, float lastKnot_v); + + class Surface { + private: + std::vector>> controlPoints; + float *d_points; + std::vector knots_u; + std::vector knots_v; + float *d_knots_u; + float *d_knots_v; + bool recordTime; + + float *d_nTexture_u; // u方向指向度为p时的device中的nurbs基函数矩阵 + float *d_nTexture_v; // v方向指向度为p时的device中的nurbs基函数矩阵 + float *d_nTexture1_u; // u方向指向度为p-1时的device中的nurbs基函数矩阵 + float *d_nTexture1_v; // v方向指向度为p-1时的device中的nurbs基函数矩阵 + + float *d_derivatives; // 一阶导计算结果 + + public: + /** + * 构造函数 + * @param controlPoints 控制点矩阵[pointsCnt_u][pointsCnt_v][3] + * @param knots_u u方向knots + * @param knots_v v方向knots + */ + __host__ explicit Surface(std::vector>> controlPoints, + std::vector knots_u, std::vector knots_v); + + /** + * 供外部CPU程序使用的、负责调用gpu并行计算的方法 + * @param sampleCnt_u u方向采样数目 + * @param sampleCnt_v v方向采样数目 + * @return 由 map 组成的vector{<, {x, y, z}>} + */ + __host__ std::vector>> + evaluate(int sampleCnt_u_, int sampleCnt_v_); + + /** + * 供外部CPU程序使用的、负责调用gpu并行计算切向量的方法 + */ + __host__ void derivative(int sampleCnt_u, int sampleCnt_v); + + /** + * 供外部CPU程序使用的、负责调用gpu并行计算二阶导的方法 + */ + __host__ void curvature(int sampleCnt_u, int sampleCnt_v); + + void setRecordTime(bool r); + + ~Surface(); + + }; +} + + + +#endif //NURBSEVALUATOR_NURBSSURFACE_CUH \ No newline at end of file diff --git a/include/utils.h b/include/utils.h new file mode 100644 index 0000000..c9c24dc --- /dev/null +++ b/include/utils.h @@ -0,0 +1,22 @@ +#ifndef UNTITLED1_UTILS_H +#define UNTITLED1_UTILS_H + +#define IN_UNIX 0 // 确定当前运行的操作系统(需要通过系统调用获得时间) + +#if IN_UNIX +#include +#include +double get_time(); +#else +#include +double get_time(); +#endif + +/** + * 保证释放后的指针指向空。这样一来保证指针不乱指,free的时候不会出错、二来可以判断指针是否已经free + * 注意指针是引用传参,因为要把指针本身置空 + */ +void safeCudaFree(float *&p); +void safeFree(float *&p); + +#endif //UNTITLED1_UTILS_H diff --git a/src/device/DeviceUtils.cu b/src/device/DeviceUtils.cu new file mode 100644 index 0000000..06f75ae --- /dev/null +++ b/src/device/DeviceUtils.cu @@ -0,0 +1,26 @@ +// +// Created by 14727 on 2022/12/9. +// + +#include "../../include/device/DeviceUtils.cuh" + +__device__ void d_safeFree(float *&p) { + if (p != nullptr) { + free(p); + p = nullptr; + } +} + +__device__ bool d_floatEqual(float a, float b) { + return abs(a - b) < 0.00001; +} + +__device__ void normalization(float &a, float &b, float &c) { + float sumA = a * a; + float sumB = b * b; + float sumC = c * c; + float sum = sumA + sumB + sumC; + a = sqrt(sumA / sum); + b = sqrt(sumB / sum); + c = sqrt(sumC / sum); +} \ No newline at end of file diff --git a/src/device/NurbsCommon.cu b/src/device/NurbsCommon.cu new file mode 100644 index 0000000..6c15ee1 --- /dev/null +++ b/src/device/NurbsCommon.cu @@ -0,0 +1,76 @@ +// +// Created by 14727 on 2022/11/19. +// + +#include "../../include/device/NurbsCommon.cuh" +#include "../../include/device/DeviceUtils.cuh" + +__device__ void d_basisFunction(float *N_Texture, const float *knots, float u, int degree, int d_knotsCnt) { + int m = d_knotsCnt - 1; + for (int p = 0; p <= degree; p++) { + for (int i = 0; i + p <= m - 1; i++) { + if (p == 0) { + if ((u > knots[i] || d_floatEqual(u, knots[i])) && (u < knots[i + 1]) + || + d_floatEqual(u, knots[i + 1]) && d_floatEqual(u, knots[m])) { + N_Texture[p * m + i] = 1.0; + } else { + N_Texture[p * m + i] = 0.0; + } + } else { + float Nip_1 = N_Texture[(p - 1) * m + i]; + float Ni1p_1 = N_Texture[(p - 1) * m + i + 1]; + float left = d_floatEqual(knots[i + p], knots[i]) ? 0 : (u - knots[i]) * Nip_1 / + (knots[i + p] - knots[i]); + float right = d_floatEqual(knots[i + p + 1], knots[i + 1]) ? 0 : (knots[i + p + 1] - u) * Ni1p_1 / + (knots[i + p + 1] - knots[i + 1]); + N_Texture[p * m + i] = left + right; + } + } + } +} + +__global__ void g_basisTexture(float *nTexture, float *nTexture1, const float *d_knots, int d_pointsCnt, int d_knotsCnt, + int d_sampleCnt) { + // 一维grid和一维block + int idx = blockIdx.x * blockDim.x + threadIdx.x; // 采样点编号 + float d_paramCeil = d_knots[d_knotsCnt - 1]; + float u = idx * d_paramCeil / (d_sampleCnt - 1); + if (u > 1.0 * d_paramCeil) { + return; + } + int d_degree = d_knotsCnt - 1 - d_pointsCnt; + auto *N_dp = (float *) malloc((d_degree + 1) * (d_knotsCnt - 1) * sizeof(float)); + d_basisFunction(N_dp, d_knots, u, d_degree, d_knotsCnt); + for (int i = 0; i < d_pointsCnt; i++) { + nTexture[idx * d_pointsCnt + i] = N_dp[d_degree * (d_knotsCnt - 1) + i]; + nTexture1[idx * (d_pointsCnt + 1) + i] = N_dp[(d_degree - 1) * (d_knotsCnt - 1) + i]; +// printf("nTexture1: %g ", nTexture1[idx * (d_pointsCnt + 1) + i]); + } + nTexture1[idx * (d_pointsCnt + 1) + d_pointsCnt] = N_dp[(d_degree - 1) * (d_knotsCnt - 1) + + d_pointsCnt]; // nTexture1多记录一列数据 + d_safeFree(N_dp); +} + +__global__ void +g_derTexture(float *derTexture, const float *nTexture1, const float *d_knots, int d_pointsCnt, int d_knotsCnt, + int d_sampleCnt) { + // 一维grid和一维block + int idx = blockIdx.x * blockDim.x + threadIdx.x; // 采样点编号 + if (idx >= d_sampleCnt) { + return; + } + int degree = d_knotsCnt - 1 - d_pointsCnt; + +// printf("degree: %d\n", degree); + for (int i = 0; i < d_pointsCnt; i++) { + float left = d_floatEqual(d_knots[i + degree], d_knots[i]) ? 0 : + nTexture1[idx * (d_pointsCnt + 1) + i] * (degree - 1) / (d_knots[i + degree] - d_knots[i]); + float right = d_floatEqual(d_knots[i + degree + 1], d_knots[i + 1]) ? 0 : + nTexture1[idx * (d_pointsCnt + 1) + i + 1] * (degree - 1) / + (d_knots[i + degree + 1] - d_knots[i + 1]); + derTexture[idx * d_pointsCnt + i] = left - right; +// printf("<%d, %d> -- %g \n", idx, i, left - right); +// printf("nTex1: %g \n", nTexture1[idx * (d_pointsCnt + 1) + i]); + } +} \ No newline at end of file diff --git a/src/device/NurbsCurve.cu b/src/device/NurbsCurve.cu new file mode 100644 index 0000000..71c92df --- /dev/null +++ b/src/device/NurbsCurve.cu @@ -0,0 +1,283 @@ +// +// Created by 14727 on 2022/12/9. +// +#include "../../include/device/NurbsCommon.cuh" +#include "../../include/device/NurbsCurve.cuh" +#include "../../include/utils.h" + +__global__ void +NurbsCurve::g_evaluate(float *res, const float *NTexture, const float *d_points, const int d_pointsCnt, + const int d_POINT_SIZE, const float d_lastKnot, const int d_sampleCnt) { +// printf(" curve calculating... \n"); + // 二维grid和一维的block +// int idx = (blockIdx.y * gridDim.x + blockIdx.x) * blockDim.x + threadIdx.x; + // 二维block和一维grid + int idx = blockIdx.x * blockDim.x * blockDim.y + threadIdx.y * blockDim.x + threadIdx.x; + float u = idx * d_lastKnot / (d_sampleCnt - 1); + if (u > 1.0 * d_lastKnot) { + return; + } +// +// int d_degree = d_knotsCnt - 1 - d_pointsCnt; +// // 注意,在device中,全局内存还是以malloc和free的方式分配和回收的,而不是使用cudaMalloc和cudaFree +// auto *N_dp = (float *) malloc((d_degree + 1) * (d_knotsCnt - 1) * sizeof(float)); +// d_basisFunction(N_dp, d_knots, u, d_degree, d_knotsCnt); + float x = 0., y = 0., z = 0., sumW = 0.; + for (int i = 0; i < d_pointsCnt; i++) { + float N = NTexture[idx * d_pointsCnt + i]; + int baseIdx = i * d_POINT_SIZE; + float w = d_points[baseIdx + 3]; + x += N * w * d_points[baseIdx]; + y += N * w * d_points[baseIdx + 1]; + z += N * w * d_points[baseIdx + 2]; + sumW += N * w; + } + x = x / sumW; + y = y / sumW; + z = z / sumW; + int baseIdx = idx * 3; // 这里的结果点大小应该固定为3 + res[baseIdx] = x; + res[baseIdx + 1] = y; + res[baseIdx + 2] = z; + printf("(%g)-->(%g, %g, %g)\n", u, x, y, z); // %g输出,舍弃无意义的0 +} + +__global__ void +NurbsCurve::g_derivative(float *derivatives, const float *derTexture, const float *nTexture, const float *d_points, + int d_pointsCnt, int d_POINT_SIZE, + float d_lastKnot, int d_sampleCnt) { + // 二维block和一维grid + int idx = blockIdx.x * blockDim.x * blockDim.y + threadIdx.y * blockDim.x + threadIdx.x; + if (idx >= d_sampleCnt) return; + float u = idx * d_lastKnot / (d_sampleCnt - 1); + float nubs_dx = 0., nubs_dy = 0., nubs_dz = 0., nubs_dw = 0.; + + for (int i = 0; i < d_pointsCnt; i++) { + int baseIdx = i * d_POINT_SIZE; + float nFactor = derTexture[idx * d_pointsCnt + i]; + float wi = d_points[baseIdx + 3]; + nubs_dx += nFactor * wi * d_points[baseIdx]; + nubs_dy += nFactor * wi * d_points[baseIdx + 1]; + nubs_dz += nFactor * wi * d_points[baseIdx + 2]; + nubs_dw += nFactor * wi; + } + + float x = 0., y = 0., z = 0., w = 0.; + for (int i = 0; i < d_pointsCnt; i++) { + float N = nTexture[idx * d_pointsCnt + i]; + int baseIdx = i * d_POINT_SIZE; + float wi = d_points[baseIdx + 3]; + x += N * wi * d_points[baseIdx]; + y += N * wi * d_points[baseIdx + 1]; + z += N * wi * d_points[baseIdx + 2]; + w += N * wi; + } + float dx = (nubs_dx * w - x * nubs_dw) / (w * w); + float dy = (nubs_dy * w - y * nubs_dw) / (w * w); + float dz = (nubs_dz * w - z * nubs_dw) / (w * w); + int baseIdx = idx * 3; + derivatives[baseIdx] = dx; + derivatives[baseIdx + 1] = dy; + derivatives[baseIdx + 2] = dz; + printf("(%g)-->(%g, %g, %g)\n", u, dx, dy, dz); +} + +__global__ void NurbsCurve::g_curvature(const float *derivatives, int sampleCnt, float lastKnot) { + // 二维block和一维grid + int idx = blockIdx.x * blockDim.x * blockDim.y + threadIdx.y * blockDim.x + threadIdx.x; + if (idx >= sampleCnt) return; + float step = lastKnot / (sampleCnt - 1); + float u = idx * step; + float sndPdx, sndPdy, sndPdz; // 二阶导 + int baseIdx = idx * 3, lastBaseIdx = (idx - 1) * 3, nextBaseIdx = (idx + 1) * 3; + if (idx == 0) { + sndPdx = (derivatives[nextBaseIdx] - derivatives[baseIdx]) / step; + sndPdy = (derivatives[nextBaseIdx + 1] - derivatives[baseIdx + 1]) / step; + sndPdz = (derivatives[nextBaseIdx + 2] - derivatives[baseIdx + 2]) / step; + } else if (idx == sampleCnt - 1) { + sndPdx = (derivatives[baseIdx] - derivatives[lastBaseIdx]) / step; + sndPdy = (derivatives[baseIdx + 1] - derivatives[lastBaseIdx + 1]) / step; + sndPdz = (derivatives[baseIdx + 2] - derivatives[lastBaseIdx + 2]) / step; + } else { + sndPdx = (derivatives[nextBaseIdx] - derivatives[lastBaseIdx]) / (2 * step); + sndPdy = (derivatives[nextBaseIdx + 1] - derivatives[lastBaseIdx + 1]) / (2 * step); + sndPdz = (derivatives[nextBaseIdx + 2] - derivatives[lastBaseIdx + 2]) / (2 * step); + } + printf("%g --> (%g, %g, %g)\n", u, sndPdx, sndPdy, sndPdz); +} + +NurbsCurve::Curve::Curve(std::vector> controlPoints, + std::vector knots) { + this->knots = std::move(knots); + this->controlPoints = std::move(controlPoints); + recordTime = false; + d_nTexture = nullptr; + d_nTexture1 = nullptr; + d_points = nullptr; + d_knots = nullptr; + d_derivatives = nullptr; +} + +void NurbsCurve::Curve::setRecordTime(bool r) { + recordTime = r; +} + +NurbsCurve::Curve::~Curve() { + safeCudaFree(d_nTexture); + safeCudaFree(d_nTexture1); + safeCudaFree(d_points); + safeCudaFree(d_knots); + cudaDeviceReset(); +} + +std::vector> +NurbsCurve::Curve::evaluate(int sampleCnt) { + + if (POINT_SIZE != controlPoints[0].size()) { + printf("Error! Nurbs控制点应表示为长度为4的齐次坐标\n"); + return {}; + } + // 构造指向device的controlPoints + const int pointsCnt = controlPoints.size(); + const int pointsBytes = pointsCnt * POINT_SIZE * sizeof(float); + auto *h_points = (float *) malloc(pointsBytes); + for (int i = 0; i < pointsCnt; i++) { + for (int j = 0; j < POINT_SIZE; j++) { + h_points[i * POINT_SIZE + j] = controlPoints[i][j]; + } + } + safeCudaFree(d_points); // 注意内存管理 + cudaMalloc((void **) &d_points, pointsBytes); + cudaMemcpy(d_points, h_points, pointsBytes, cudaMemcpyHostToDevice); + + // 构造指向device的knots + const int knotsCnt = knots.size(); + const int knotsBytes = knotsCnt * sizeof(float); + auto *h_knots = (float *) malloc(knotsBytes); + for (int i = 0; i < knotsCnt; i++) h_knots[i] = knots[i]; + safeCudaFree(d_knots); // 注意内存管理 + cudaMalloc((void **) &d_knots, knotsBytes); + cudaMemcpy(d_knots, h_knots, knotsBytes, cudaMemcpyHostToDevice); + + // 分配nTexture的内存。只需要GPU内存 + safeCudaFree(d_nTexture); // 注意内存管理 + cudaMalloc((void **) &d_nTexture, + sampleCnt * pointsCnt * sizeof(float)); + + // 分配nTexture1的内存。只需要GPU内存 + safeCudaFree(d_nTexture1); // 注意内存管理 + cudaMalloc((void **) &d_nTexture1, sampleCnt * (pointsCnt + 1) * sizeof(float)); // 注意nTexture的大小,在算梯度时用得到i=pointsCnt + 1的基函数值 + + // 结果数组 + size_t resBytes = sampleCnt * 3 * sizeof(float); + float *d_res; + cudaMalloc((void **)&d_res, resBytes); + auto *h_res = (float *)malloc(resBytes); + + + // 构造g_basisTexture线程层级 + dim3 blockBasis(512); + dim3 gridBasis((sampleCnt + blockBasis.x - 1) / blockBasis.x); + + // 构造线程层级 + dim3 block(32, 32); + dim3 grid((sampleCnt + block.x * block.y - 1) / (block.x * block.y)); + // 记录用时 + double time_cost_device; + if (recordTime) time_cost_device = get_time(); + printf("there..\n"); + g_basisTexture <<>>(d_nTexture, d_nTexture1, d_knots, pointsCnt, knotsCnt, sampleCnt); +// cudaMemcpy(d_nTextureCpy, d_nTexture, nTextureBytes, cudaMemcpyDeviceToDevice); // 有同步功能 + cudaDeviceSynchronize(); + printf("here..\n"); + g_evaluate <<>>(d_res, d_nTexture, d_points, pointsCnt, POINT_SIZE, knots[knotsCnt - 1], sampleCnt); +// g_test<<<1,6>>>(d_nTextureCpy); + cudaDeviceSynchronize(); // 所用线程结束后再获取结束时间。cudaThreadSynchronize()在CUDA1.0后被弃用 + if (recordTime) { + time_cost_device = get_time() - time_cost_device; + printf("GPU time cost of curve evaluation for %d samples: %lf\n", sampleCnt, time_cost_device); + } + + cudaMemcpy(h_res, d_res, resBytes, cudaMemcpyDeviceToHost); + std::vector> res(sampleCnt, std::vector(3, 0.)); + for(int i = 0; i < sampleCnt; i++) { + int baseIdx = i * 3; + res[i][0] = h_res[baseIdx]; + res[i][1] = h_res[baseIdx + 1]; + res[i][2] = h_res[baseIdx + 2]; + } + + safeFree(h_points); + safeFree(h_knots); + safeCudaFree(d_res); + safeFree(h_res); + return res; +} + +void NurbsCurve::Curve::derivative(int sampleCnt) { + // 先完成evaluation + evaluate(sampleCnt); + + if (POINT_SIZE != controlPoints[0].size()) { + printf("Error! Nurbs控制点应表示为长度为4的齐次坐标\n"); + return; + } + + float *d_derTexture = nullptr; + const int pointsCnt = controlPoints.size(); + const int knotsCnt = knots.size(); + cudaMalloc((void **) &d_derTexture, sampleCnt * pointsCnt * sizeof(float)); + + // 构造线程层级 + dim3 block(32, 32); + dim3 grid((sampleCnt + block.x * block.y - 1) / (block.x * block.y)); + // 构造g_basisTexture线程层级 + dim3 blockTex(512); + dim3 gridTex((sampleCnt + blockTex.x - 1) / blockTex.x); + + + // 构造切向量计算结果 + safeCudaFree(d_derivatives); + cudaMalloc((void **) &d_derivatives, sampleCnt * 3 * sizeof(float)); // 每个采用所求的切向量是一个三维向量 + + // 记录用时 + double time_cost_device; + if (recordTime) time_cost_device = get_time(); + g_derTexture<<>>(d_derTexture, d_nTexture1, d_knots, pointsCnt, knotsCnt, sampleCnt); + cudaDeviceSynchronize(); + g_derivative<<>>(d_derivatives, d_derTexture, d_nTexture, d_points, pointsCnt, POINT_SIZE, + knots[knotsCnt - 1], sampleCnt); + cudaDeviceSynchronize(); // 所用线程结束后再获取结束时间。cudaThreadSynchronize()在CUDA1.0后被弃用 + if (recordTime) { + time_cost_device = get_time() - time_cost_device; + printf("GPU time cost of curve first derivative calculating for %d samples: %lf\n", sampleCnt, + time_cost_device); + } + cudaFree(d_derTexture); +} + +void NurbsCurve::Curve::curvature(int sampleCnt) { + // 先计算切向量 + derivative(sampleCnt); + + if (POINT_SIZE != controlPoints[0].size()) { + printf("Error! Nurbs控制点应表示为长度为4的齐次坐标\n"); + return; + } + + // 构造线程层级 + dim3 block(32, 32); + dim3 grid((sampleCnt + block.x * block.y - 1) / (block.x * block.y)); + + // 记录用时 + double time_cost_device; + if (recordTime) time_cost_device = get_time(); + g_curvature<<>>(d_derivatives, sampleCnt, knots[knots.size() - 1]); + cudaDeviceSynchronize(); // 所用线程结束后再获取结束时间。cudaThreadSynchronize()在CUDA1.0后被弃用 + if (recordTime) { + time_cost_device = get_time() - time_cost_device; + printf("GPU time cost of curve second derivative calculating for %d samples: %lf\n", sampleCnt, + time_cost_device); + } +} + diff --git a/src/device/NurbsSurface.cu b/src/device/NurbsSurface.cu new file mode 100644 index 0000000..d39b8ad --- /dev/null +++ b/src/device/NurbsSurface.cu @@ -0,0 +1,424 @@ +// +// Created by 14727 on 2022/12/9. +// + +#include "../../include/device/NurbsSurface.cuh" +#include "../../include/device/NurbsCommon.cuh" +#include "../../include/device/DeviceUtils.cuh" +#include "../../include/utils.h" + +__global__ void +NurbsSurface::g_evaluate(float* res, const float *d_nTexture_u, const float *d_nTexture_v, const float *d_points, int d_pointsCnt_u, + int d_pointsCnt_v, int d_POINT_SIZE, float d_lastKnot_u, float d_lastKnot_v, int d_sampleCnt_u, + int d_sampleCnt_v) { + + // 二维grid和二维的block + int ix = blockIdx.x * blockDim.x + threadIdx.x; + int iy = blockIdx.y * blockDim.y + threadIdx.y; + + float u = ix * d_lastKnot_u / (d_sampleCnt_u - 1); + float v = iy * d_lastKnot_v / (d_sampleCnt_v - 1); + + if (u > 1.0 * d_lastKnot_u || v > 1.0 * d_lastKnot_v) { + return; + } + + float x = 0., y = 0., z = 0., sumW = 0.; + for (int i = 0; i < d_pointsCnt_u; i++) { + float N_U = d_nTexture_u[ix * d_pointsCnt_u + i]; + for (int j = 0; j < d_pointsCnt_v; j++) { + float N_V = d_nTexture_v[iy * d_pointsCnt_v + j]; + int idx = (i * d_pointsCnt_v + j) * d_POINT_SIZE; + float w = d_points[idx + 3]; + x += N_U * N_V * w * d_points[idx]; + y += N_U * N_V * w * d_points[idx + 1]; + z += N_U * N_V * w * d_points[idx + 2]; + sumW += N_U * N_V * w; + } + } + x = x / sumW; + y = y / sumW; + z = z / sumW; + +// int baseIdx = (ix * d_sampleCnt_v + iy) * 3; +// res[baseIdx] = x; +// res[baseIdx + 1] = y; +// res[baseIdx + 2] = z; + +// printf("(%d, %d)-->(%g, %g, %g)\n", ix, iy, x, y, z); // %g输出,舍弃无意义的0 +} + +__global__ void +NurbsSurface::g_derivative(float *derivatives, const float *derTexture_u, const float *derTexture_v, + const float *nTexture_u, const float *nTexture_v, const float *d_points, int d_pointsCnt_u, + int d_pointsCnt_v, int d_POINT_SIZE, float d_lastKnot_u, float d_lastKnot_v, + int d_sampleCnt_u, int d_sampleCnt_v) { + // 二维grid和二维的block + int ix = blockIdx.x * blockDim.x + threadIdx.x; + int iy = blockIdx.y * blockDim.y + threadIdx.y; + + if (ix >= d_sampleCnt_u || iy >= d_sampleCnt_v) { + return; + } + float u = ix * d_lastKnot_u / (d_sampleCnt_u - 1); + float v = iy * d_lastKnot_v / (d_sampleCnt_v - 1); + + float nubsPdx_u = 0., nubsPdy_u = 0, nubsPdz_u = 0., nubsPdw_u = 0.; + float nubsPdx_v = 0., nubsPdy_v = 0, nubsPdz_v = 0., nubsPdw_v = 0.; + + for (int i = 0; i < d_pointsCnt_u; i++) { + for (int j = 0; j < d_pointsCnt_u; j++) { + int baseIdx = (i * d_pointsCnt_v + j) * d_POINT_SIZE; + float factor_u = derTexture_u[ix * d_pointsCnt_u + i] * nTexture_v[iy * d_pointsCnt_v + j]; + float factor_v = derTexture_v[iy * d_pointsCnt_v + j] * nTexture_u[ix * d_pointsCnt_u + i]; + float wij = d_points[baseIdx + 3]; + nubsPdx_u += factor_u * wij * d_points[baseIdx]; + nubsPdy_u += factor_u * wij * d_points[baseIdx + 1]; + nubsPdz_u += factor_u * wij * d_points[baseIdx + 2]; + nubsPdw_u += factor_u * wij; + + nubsPdx_v += factor_v * wij * d_points[baseIdx]; + nubsPdy_v += factor_v * wij * d_points[baseIdx + 1]; + nubsPdz_v += factor_v * wij * d_points[baseIdx + 2]; + nubsPdw_v += factor_v * wij; + } + } + + float x = 0., y = 0., z = 0., w = 0.; + for (int i = 0; i < d_pointsCnt_u; i++) { + float N_U = nTexture_u[ix * d_pointsCnt_u + i]; + for (int j = 0; j < d_pointsCnt_v; j++) { + float N_V = nTexture_v[iy * d_pointsCnt_v + j]; + int idx = (i * d_pointsCnt_v + j) * d_POINT_SIZE; + float wij = d_points[idx + 3]; + x += N_U * N_V * wij * d_points[idx]; + y += N_U * N_V * wij * d_points[idx + 1]; + z += N_U * N_V * wij * d_points[idx + 2]; + w += N_U * N_V * wij; + } + } + + float w2 = w * w; + float pdx_u = (nubsPdx_u * w - x * nubsPdw_u) / w2; + float pdy_u = (nubsPdy_u * w - y * nubsPdw_u) / w2; + float pdz_u = (nubsPdz_u * w - z * nubsPdw_u) / w2; + + float pdx_v = (nubsPdx_v * w - x * nubsPdw_v) / w2; + float pdy_v = (nubsPdy_v * w - y * nubsPdw_v) / w2; + float pdz_v = (nubsPdz_v * w - z * nubsPdw_v) / w2; +// float pdz_u = (nubsPdz_u * w - z ) + + int baseIdx = (ix * d_sampleCnt_v + iy) * 6; + derivatives[baseIdx] = pdx_u; + derivatives[baseIdx + 1] = pdy_u; + derivatives[baseIdx + 2] = pdz_u; + derivatives[baseIdx + 3] = pdx_v; + derivatives[baseIdx + 4] = pdy_v; + derivatives[baseIdx + 5] = pdz_v; + + float x_n = pdy_u * pdz_v - pdy_v * pdz_u, y_n = pdx_v * pdz_u - pdx_u * pdz_v, z_n = + pdx_u * pdy_v - pdx_v * pdy_u; // 叉乘得到法向量 + if ((ix == 8 && iy == 9) || (ix == 7 && iy == 9) || (ix == 9 && iy == 9) || (ix == 8 && iy == 8) || + (ix == 8 && iy == 10)) + printf("(%g,%g)-->u:(%g, %g, %g), v:(%g,%g,%g), normal:(%g,%g,%g)\n", u, v, pdx_u, pdy_u, pdz_u, pdx_v, pdy_v, + pdz_v, x_n, y_n, z_n); +} + +__global__ void +NurbsSurface::g_curvature(const float *derivatives, int sampleCnt_u, int sampleCnt_v, float lastKnot_u, + float lastKnot_v) { + // 二维grid和二维的block + int ix = blockIdx.x * blockDim.x + threadIdx.x; + int iy = blockIdx.y * blockDim.y + threadIdx.y; + + if (ix >= sampleCnt_u || iy >= sampleCnt_v) { + return; + } + + + float step_u = lastKnot_u / (sampleCnt_u - 1), step_v = lastKnot_v / (sampleCnt_v - 1); + float u = ix * step_u, v = iy * step_v; + + int baseIdx = (ix * sampleCnt_v + iy) * 6; + int lastBaseIdx_u = ((ix - 1) * sampleCnt_v + iy) * 6, nextBaseIdx_u = ((ix + 1) * sampleCnt_v + iy) * 6; + int lastBaseIdx_v = (ix * sampleCnt_v + iy - 1) * 6, nextBaseIdx_v = (ix * sampleCnt_v + iy + 1) * 6; + +// printf("(%g,%g)-->u:(%g, %g, %g), v:(%g,%g,%g)\n", u, v, derivatives[baseIdx], derivatives[baseIdx + 1], +// derivatives[baseIdx + 2], derivatives[baseIdx + 3], derivatives[baseIdx + 4], derivatives[baseIdx + 5]); + + float sndPdx_uu, sndPdy_uu, sndPdz_uu, sndPdx_vv, sndPdy_vv, sndPdz_vv; // 二阶导 + float sndPdx_uv, sndPdy_uv, sndPdz_uv, sndPdx_vu, sndPdy_vu, sndPdz_vu; + + if (ix == 0) { + sndPdx_uu = (derivatives[nextBaseIdx_u] - derivatives[baseIdx]) / step_u; + sndPdy_uu = (derivatives[nextBaseIdx_u + 1] - derivatives[baseIdx + 1]) / step_u; + sndPdz_uu = (derivatives[nextBaseIdx_u + 2] - derivatives[baseIdx + 2]) / step_u; + + sndPdx_vu = (derivatives[nextBaseIdx_u + 3] - derivatives[baseIdx + 3]) / step_u; + sndPdy_vu = (derivatives[nextBaseIdx_u + 4] - derivatives[baseIdx + 4]) / step_u; + sndPdz_vu = (derivatives[nextBaseIdx_u + 5] - derivatives[baseIdx + 5]) / step_u; + } else if (ix == sampleCnt_u - 1) { + sndPdx_uu = (derivatives[baseIdx] - derivatives[lastBaseIdx_u]) / step_u; + sndPdy_uu = (derivatives[baseIdx + 1] - derivatives[lastBaseIdx_u + 1]) / step_u; + sndPdz_uu = (derivatives[baseIdx + 2] - derivatives[lastBaseIdx_u + 2]) / step_u; + + sndPdx_vu = (derivatives[baseIdx + 3] - derivatives[lastBaseIdx_u + 3]) / step_u; + sndPdy_vu = (derivatives[baseIdx + 4] - derivatives[lastBaseIdx_u + 4]) / step_u; + sndPdz_vu = (derivatives[baseIdx + 5] - derivatives[lastBaseIdx_u + 5]) / step_u; + } else { + sndPdx_uu = (derivatives[nextBaseIdx_u] - derivatives[lastBaseIdx_u]) / (2 * step_u); + sndPdy_uu = (derivatives[nextBaseIdx_u + 1] - derivatives[lastBaseIdx_u + 1]) / (2 * step_u); + sndPdz_uu = (derivatives[nextBaseIdx_u + 2] - derivatives[lastBaseIdx_u + 2]) / (2 * step_u); + + sndPdx_vu = (derivatives[nextBaseIdx_u + 3] - derivatives[lastBaseIdx_u + 3]) / (2 * step_u); + sndPdy_vu = (derivatives[nextBaseIdx_u + 4] - derivatives[lastBaseIdx_u + 4]) / (2 * step_u); + sndPdz_vu = (derivatives[nextBaseIdx_u + 5] - derivatives[lastBaseIdx_u + 5]) / (2 * step_u); + } + + if (iy == 0) { + sndPdx_vv = (derivatives[nextBaseIdx_v + 3] - derivatives[baseIdx + 3]) / step_v; + sndPdy_vv = (derivatives[nextBaseIdx_v + 4] - derivatives[baseIdx + 4]) / step_v; + sndPdz_vv = (derivatives[nextBaseIdx_v + 5] - derivatives[baseIdx + 5]) / step_v; + + sndPdx_uv = (derivatives[nextBaseIdx_v] - derivatives[baseIdx]) / step_v; + sndPdy_uv = (derivatives[nextBaseIdx_v + 1] - derivatives[baseIdx + 1]) / step_v; + sndPdz_uv = (derivatives[nextBaseIdx_v + 2] - derivatives[baseIdx + 2]) / step_v; + } else if (iy == sampleCnt_v - 1) { + sndPdx_vv = (derivatives[baseIdx + 3] - derivatives[lastBaseIdx_v + 3]) / step_v; + sndPdy_vv = (derivatives[baseIdx + 4] - derivatives[lastBaseIdx_v + 4]) / step_v; + sndPdz_vv = (derivatives[baseIdx + 5] - derivatives[lastBaseIdx_v + 5]) / step_v; + + sndPdx_uv = (derivatives[baseIdx] - derivatives[lastBaseIdx_v]) / step_v; + sndPdy_uv = (derivatives[baseIdx + 1] - derivatives[lastBaseIdx_v + 1]) / step_v; + sndPdz_uv = (derivatives[baseIdx + 2] - derivatives[lastBaseIdx_v + 2]) / step_v; + } else { + sndPdx_vv = (derivatives[nextBaseIdx_v + 3] - derivatives[lastBaseIdx_v + 3]) / (2 * step_v); + sndPdy_vv = (derivatives[nextBaseIdx_v + 4] - derivatives[lastBaseIdx_v + 4]) / (2 * step_v); + sndPdz_vv = (derivatives[nextBaseIdx_v + 5] - derivatives[lastBaseIdx_v + 5]) / (2 * step_v); + + sndPdx_uv = (derivatives[nextBaseIdx_v] - derivatives[lastBaseIdx_v]) / (2 * step_v); + sndPdy_uv = (derivatives[nextBaseIdx_v + 1] - derivatives[lastBaseIdx_v + 1]) / (2 * step_v); + sndPdz_uv = (derivatives[nextBaseIdx_v + 2] - derivatives[lastBaseIdx_v + 2]) / (2 * step_v); + } + + float uvx = (sndPdx_uv + sndPdx_vu) / 2, uvy = (sndPdy_uv + sndPdy_vu) / 2, uvz = (sndPdz_uv + sndPdz_vu) / 2; + normalization(sndPdx_uu, sndPdy_uu, sndPdz_uu); + normalization(uvx, uvy, uvz); + normalization(sndPdx_vv, sndPdy_vv, sndPdz_vv); + + + if (ix == 8 && iy == 9) + printf("(%g, %g) --> uu: (%g, %g, %g), uv: (%g, %g, %g), vv: (%g, %g, %g)\n", u, v, sndPdx_uu, sndPdy_uu, + sndPdz_uu, + uvx, uvy, uvz, sndPdx_vv, sndPdy_vv, sndPdz_vv); +} + + +__host__ NurbsSurface::Surface::Surface(std::vector>> controlPoints, + std::vector knots_u, std::vector knots_v) { + this->knots_u = std::move(knots_u); + this->knots_v = std::move(knots_v); + this->controlPoints = std::move(controlPoints); + recordTime = false; + d_nTexture_u = nullptr; + d_nTexture_v = nullptr; + d_nTexture1_u = nullptr; + d_nTexture1_v = nullptr; + d_knots_u = nullptr; + d_knots_v = nullptr; + d_points = nullptr; + d_derivatives = nullptr; +} + +__host__ std::vector>> +NurbsSurface::Surface::evaluate(int sampleCnt_u, int sampleCnt_v) { + if (POINT_SIZE != controlPoints[0][0].size()) { + printf("Error! Nurbs控制点应表示为长度为4的齐次坐标\n"); + return {}; + } + + // 构造指向device的controlPoints + const int pointsCnt_u = controlPoints.size(), pointsCnt_v = controlPoints[0].size(); + const int pointsBytes = pointsCnt_u * pointsCnt_v * POINT_SIZE * sizeof(float); + auto *h_points = (float *) malloc(pointsBytes); + for (int i = 0; i < pointsCnt_u; i++) { + for (int j = 0; j < pointsCnt_v; j++) { + for (int k = 0; k < POINT_SIZE; k++) { + h_points[(i * pointsCnt_v + j) * POINT_SIZE + k] = controlPoints[i][j][k]; + } + } + } + cudaMalloc((void **) &d_points, pointsBytes); + cudaMemcpy(d_points, h_points, pointsBytes, cudaMemcpyHostToDevice); + + // 构造指向device的knots + const int knotsCnt_u = knots_u.size(), knotsCnt_v = knots_v.size(); + const int knotsBytes_u = knotsCnt_u * sizeof(float), knotsBytes_v = knotsCnt_v * sizeof(float); + auto *h_knots_u = (float *) malloc(knotsBytes_u), *h_knots_v = (float *) malloc(knotsBytes_v); + for (int i = 0; i < knotsCnt_u; i++) h_knots_u[i] = knots_u[i]; + for (int i = 0; i < knotsCnt_v; i++) h_knots_v[i] = knots_v[i]; + + cudaMalloc((void **) &d_knots_u, knotsBytes_u); + cudaMalloc((void **) &d_knots_v, knotsBytes_v); + cudaMemcpy(d_knots_u, h_knots_u, knotsBytes_u, cudaMemcpyHostToDevice); + cudaMemcpy(d_knots_v, h_knots_v, knotsBytes_v, cudaMemcpyHostToDevice); + + // 构造nTexture + cudaMalloc((void **) &d_nTexture_u, + sampleCnt_u * pointsCnt_u * sizeof(float)); // 注意nTexture的大小,在算梯度时用得到i=pointsCnt + 1的基函数值 + cudaMalloc((void **) &d_nTexture_v, sampleCnt_v * pointsCnt_v * sizeof(float)); + + // 构造nTexture1 + cudaMalloc((void **) &d_nTexture1_u, sampleCnt_u * (pointsCnt_u + 1) * sizeof(float)); + cudaMalloc((void **) &d_nTexture1_v, sampleCnt_v * (pointsCnt_v + 1) * sizeof(float)); + + // 结果数组 + size_t resBytes = sampleCnt_u * sampleCnt_v * 3 * sizeof(float); + float *d_res; + cudaMalloc((void **)&d_res, resBytes); + auto *h_res = (float *)malloc(resBytes); + + + // 构造g_basisTexture线程层级 + dim3 blockBasis(512); + dim3 gridBasis_u((sampleCnt_u + blockBasis.x - 1) / blockBasis.x); + dim3 gridBasis_v((sampleCnt_v + blockBasis.x - 1) / blockBasis.x); + + // 构造线程层级,调用核函数 + dim3 block(32, 32); + dim3 grid((sampleCnt_u + block.x - 1) / block.x, (sampleCnt_v + block.y - 1) / block.y); + // 记录用时 + double time_cost_device; + g_basisTexture<<>>(d_nTexture_u, d_nTexture1_u, d_knots_u, pointsCnt_u, knotsCnt_u, + sampleCnt_u); + cudaDeviceSynchronize(); + g_basisTexture<<>>(d_nTexture_v, d_nTexture1_v, d_knots_v, pointsCnt_v, knotsCnt_v, + sampleCnt_v); + cudaDeviceSynchronize(); + + if (recordTime) time_cost_device = get_time(); + g_evaluate <<>>(d_res, d_nTexture_u, d_nTexture_v, d_points, pointsCnt_u, pointsCnt_v, POINT_SIZE, + knots_u[knotsCnt_u - 1], knots_v[knotsCnt_v - 1], sampleCnt_u, sampleCnt_v); + cudaDeviceSynchronize(); // 所用线程结束后再获取结束时间。cudaThreadSynchronize()在CUDA1.0后被弃用 + if (recordTime) { + time_cost_device = get_time() - time_cost_device; + printf("GPU time cost of surface evaluation for %d samples: %lf\n", sampleCnt_u * sampleCnt_v, + time_cost_device); + } + + cudaMemcpy(h_res, d_res, resBytes, cudaMemcpyDeviceToHost); + std::vector>> res(sampleCnt_u, std::vector>(sampleCnt_v, std::vector(3, 0.))); + for(int i = 0; i < sampleCnt_u; i++) { + int baseIdx = i * sampleCnt_v * 3; + for(int j = 0; j < sampleCnt_v; j++) { + baseIdx += j * 3; + res[i][j][0] = h_res[baseIdx]; + res[i][j][1] = h_res[baseIdx + 1]; + res[i][j][2] = h_res[baseIdx + 2]; +// printf("%d, %d: %f, %f, %f\n", i, j, res[i][j][0], res[i][j][1], res[i][j][2]); + } + } + + // 释放内存 + safeFree(h_points); + safeFree(h_knots_u); + safeFree(h_knots_v); + safeCudaFree(d_res); + safeFree(h_res); + + return res; +} + +__host__ void NurbsSurface::Surface::derivative(int sampleCnt_u, int sampleCnt_v) { + // 先完成evaluation + evaluate(sampleCnt_u, sampleCnt_v); + + if (POINT_SIZE != controlPoints[0][0].size()) { + printf("Error! Nurbs控制点应表示为长度为4的齐次坐标\n"); + return; + } + + float *d_derTexture_u = nullptr; + float *d_derTexture_v = nullptr; + const int pointsCnt_u = controlPoints.size(), pointsCnt_v = controlPoints[0].size(); + const int knotsCnt_u = knots_u.size(), knotsCnt_v = knots_v.size(); + cudaMalloc((void **) &d_derTexture_u, sampleCnt_u * pointsCnt_u * sizeof(float)); + cudaMalloc((void **) &d_derTexture_v, sampleCnt_v * pointsCnt_v * sizeof(float)); + + // 构造切向量计算结果 + safeCudaFree(d_derivatives); + cudaMalloc((void **) &d_derivatives, + sampleCnt_v * sampleCnt_u * 6 * sizeof(float)); // 每个采用所求的切向量是一个六元向量,前三位是对u的偏导、后三位是对v的偏导 + + // 构造线程层级 + dim3 block(32, 32); + dim3 grid((sampleCnt_u + block.x - 1) / block.x, (sampleCnt_v + block.y - 1) / block.y); + // 构造g_basisTexture线程层级 + dim3 blockTex(512); + dim3 gridTex_u((sampleCnt_u + blockTex.x - 1) / blockTex.x); + dim3 gridTex_v((sampleCnt_v + blockTex.x - 1) / blockTex.x); + // 记录用时 + double time_cost_device; + if (recordTime) time_cost_device = get_time(); + g_derTexture<<>>(d_derTexture_u, d_nTexture1_u, d_knots_u, pointsCnt_u, knotsCnt_u, + sampleCnt_u); + g_derTexture<<>>(d_derTexture_v, d_nTexture1_v, d_knots_v, pointsCnt_v, knotsCnt_v, + sampleCnt_v); + cudaDeviceSynchronize(); + g_derivative<<>>(d_derivatives, d_derTexture_u, d_derTexture_v, d_nTexture_u, d_nTexture_v, d_points, + pointsCnt_u, + pointsCnt_v, POINT_SIZE, knots_u[knotsCnt_u - 1], knots_v[knotsCnt_v - 1], + sampleCnt_u, + sampleCnt_v); + cudaDeviceSynchronize(); // 所用线程结束后再获取结束时间。cudaThreadSynchronize()在CUDA1.0后被弃用 + if (recordTime) { + time_cost_device = get_time() - time_cost_device; + printf("GPU time cost of surface first derivative calculating for %d samples: %lf\n", sampleCnt_u * sampleCnt_v, + time_cost_device); + } + cudaFree(d_derTexture_u); + cudaFree(d_derTexture_v); +} + + + +__host__ void NurbsSurface::Surface::curvature(int sampleCnt_u, int sampleCnt_v) { + // 先计算切向量 + derivative(sampleCnt_u, sampleCnt_v); + if (POINT_SIZE != controlPoints[0][0].size()) { + printf("Error! Nurbs控制点应表示为长度为4的齐次坐标\n"); + return; + } + + // 构造线程层级 + dim3 block(32, 32); + dim3 grid((sampleCnt_u + block.x - 1) / block.x, (sampleCnt_v + block.y - 1) / block.y); + + // 记录用时 + double time_cost_device; + if (recordTime) time_cost_device = get_time(); + g_curvature<<>>(d_derivatives, sampleCnt_u, sampleCnt_v, knots_u[knots_u.size() - 1], + knots_v[knots_v.size() - 1]); + cudaDeviceSynchronize(); // 所用线程结束后再获取结束时间。cudaThreadSynchronize()在CUDA1.0后被弃用 + if (recordTime) { + time_cost_device = get_time() - time_cost_device; + printf("GPU time cost of surface second derivative calculating for %d samples: %lf\n", + sampleCnt_u * sampleCnt_v, + time_cost_device); + } +} + +void NurbsSurface::Surface::setRecordTime(bool r) { + recordTime = r; +} + +NurbsSurface::Surface::~Surface() { + safeCudaFree(d_nTexture_u); + safeCudaFree(d_nTexture_v); + safeCudaFree(d_nTexture1_u); + safeCudaFree(d_nTexture1_v); + safeCudaFree(d_points); + safeCudaFree(d_knots_u); + safeCudaFree(d_knots_v); + cudaDeviceReset(); +} \ No newline at end of file diff --git a/main.cpp b/src/main.cpp similarity index 87% rename from main.cpp rename to src/main.cpp index 770b91c..81e8bc6 100644 --- a/main.cpp +++ b/src/main.cpp @@ -1,8 +1,9 @@ #include -#include "NurbsEvaluator.cuh" +#include "../include/device/NurbsCurve.cuh" +#include "../include/device/NurbsSurface.cuh" int main() { - NurbsSurface::Evaluator nurbsSurfaceEvaluator({ + NurbsSurface::Surface nurbsSurfaceEvaluator({ {{-1, 0, 0, 0.3}, {0, 1, 6, 0.8}, {1, 0, 4, 0.5}, {2, 0.5, 3, 0.8}, {3, 3, 1, 0.6}, {4, -5, 0, 0.7}}, {{-2, 1, 1.2, 0.2}, {1, 2, 3, 0.3}, {2, 2, 3, 0.6}, {-1, -0.3, 2, 0.4}, {-1, 2, 0, 0.9}, {7, -8, 2, 0.3}}, {{-3.4, 2, 3, 0.8}, {2, 3, 0, 0.6}, {4, 3, 7, 0.3}, {-2, 0, -0.2, 0.4}, {1, 1.7, 5, 0.6}, {9, -10.3, 6, 0.7}}, @@ -21,11 +22,12 @@ int main() { // {0, 0, 0, 0.1, 0.5, 0.8, 1, 1, 1}, // {0, 0, 0, 0.2, 0.7, 0.8, 1, 1, 1}); nurbsSurfaceEvaluator.setRecordTime(true); - nurbsSurfaceEvaluator.curvature(10001, 10001); + nurbsSurfaceEvaluator.curvature(101, 101); +// nurbsSurfaceEvaluator.evaluate(101, 101); printf("==============================\n"); - NurbsCurve::Evaluator nurbsCurveEvaluator( + NurbsCurve::Curve nurbsCurveEvaluator( {{-1, 0, 0, 0.3}, {0, 1, 6, 0.4}, {1, 0, 4, 0.5}, @@ -34,10 +36,13 @@ int main() { {4, -5, 0, 0.7}}, {0, 0, 0, 0.1, 0.5, 0.8, 1, 1, 1}); nurbsCurveEvaluator.setRecordTime(true); - nurbsCurveEvaluator.curvature(11); +// nurbsCurveEvaluator.curvature(11); + auto res = nurbsCurveEvaluator.evaluate(11); + for(auto & re : res) { + printf("%f, %f, %f\n", re[0], re[1], re[2]); + } printf("\n"); // nurbsCurveEvaluator.derivative(); return 0; -} - +} \ No newline at end of file diff --git a/src/utils.cpp b/src/utils.cpp new file mode 100644 index 0000000..fa4ebfa --- /dev/null +++ b/src/utils.cpp @@ -0,0 +1,46 @@ +#include "../include/utils.h" +#include "cuda_runtime.h" + +#if IN_UNIX +double get_time() { + struct timeval tv{}; + double t; + + gettimeofday(&tv, (struct timezone *) nullptr); + t = tv.tv_sec + (double) tv.tv_usec * 1e-6; + + return t; +} +#else +double get_time() { + LARGE_INTEGER timer; + static LARGE_INTEGER fre; + static int init = 0; + double t; + + if (init != 1) { + QueryPerformanceFrequency(&fre); + init = 1; + } + + QueryPerformanceCounter(&timer); + + t = timer.QuadPart * 1. / fre.QuadPart; + + return t; +} +#endif + +void safeCudaFree(float *&p) { + if (p != nullptr) { + cudaFree(p); + p = nullptr; + } +} + +void safeFree(float *&p) { + if (p != nullptr) { + free(p); + p = nullptr; + } +} \ No newline at end of file diff --git a/utils.cpp b/utils.cpp deleted file mode 100644 index f115cd4..0000000 --- a/utils.cpp +++ /dev/null @@ -1,19 +0,0 @@ -#include "utils.h" - -double utils::get_time_windows() { - LARGE_INTEGER timer; - static LARGE_INTEGER fre; - static int init = 0; - double t; - - if (init != 1) { - QueryPerformanceFrequency(&fre); - init = 1; - } - - QueryPerformanceCounter(&timer); - - t = timer.QuadPart * 1. / fre.QuadPart; - - return t; -} \ No newline at end of file diff --git a/utils.h b/utils.h deleted file mode 100644 index 4ab8535..0000000 --- a/utils.h +++ /dev/null @@ -1,11 +0,0 @@ -#ifndef UNTITLED1_UTILS_H -#define UNTITLED1_UTILS_H -#include - - -namespace utils { - double get_time_windows(); -} - - -#endif //UNTITLED1_UTILS_H