20 changed files with 1120 additions and 1090 deletions
@ -1,38 +0,0 @@ |
|||
// |
|||
// Created by 14727 on 2022/11/19. |
|||
// |
|||
|
|||
#include "NurbsBasis.cuh" |
|||
|
|||
__device__ void d_basisFunction1(float *N_Texture, const float *knots, float u, int degree, int d_knotsCnt) { |
|||
int m = d_knotsCnt - 1; |
|||
for (int p = 0; p <= degree; p++) { |
|||
for (int i = 0; i + p <= m - 1; i++) { |
|||
if (p == 0) { |
|||
if ((u > knots[i] || d_floatEqual1(u, knots[i])) && (u < knots[i + 1]) |
|||
|| |
|||
d_floatEqual1(u, knots[i + 1]) && d_floatEqual1(u, knots[m])) { |
|||
N_Texture[p * m + i] = 1.0; |
|||
} else { |
|||
N_Texture[p * m + i] = 0.0; |
|||
} |
|||
} else { |
|||
float Nip_1 = N_Texture[(p - 1) * m + i]; |
|||
float Ni1p_1 = N_Texture[(p - 1) * m + i + 1]; |
|||
float left = d_floatEqual1(knots[i + p], knots[i]) ? 0 : (u - knots[i]) * Nip_1 / |
|||
(knots[i + p] - knots[i]); |
|||
float right = d_floatEqual1(knots[i + p + 1], knots[i + 1]) ? 0 : (knots[i + p + 1] - u) * Ni1p_1 / |
|||
(knots[i + p + 1] - knots[i + 1]); |
|||
N_Texture[p * m + i] = left + right; |
|||
} |
|||
} |
|||
} |
|||
} |
|||
|
|||
__device__ bool d_floatEqual1(float a, float b) { |
|||
return abs(a - b) < 0.00001; |
|||
} |
|||
|
|||
void NurbsBasis::myPrint11(int a, int b) { |
|||
printf("In NurbsBasis %d!!!\n", a * b); |
|||
} |
|||
@ -1,28 +0,0 @@ |
|||
// |
|||
// Created by 14727 on 2022/11/19. |
|||
// |
|||
|
|||
#ifndef NURBSEVALUATOR_NURBSBASIS_CUH |
|||
#define NURBSEVALUATOR_NURBSBASIS_CUH |
|||
|
|||
#include <cuda_runtime.h> |
|||
#include "cstdio" |
|||
|
|||
/** |
|||
* 当u值已知时,根据基函数N的递推表达式,采用动态规划的方式求解N值 |
|||
* @param N_Texture 结果返回在N_Texture中 |
|||
*/ |
|||
extern __device__ void d_basisFunction1(float *N_Texture, const float *knots, float u, int degree, int d_knotsCnt); |
|||
|
|||
/** |
|||
* device中判断两个浮点数是否相等。与CPU中一样,GPU中的浮点数也存在很小的误差,直接使用==判断往往容易将相等误判为不等 |
|||
* @return true:相等 |
|||
*/ |
|||
extern __device__ bool d_floatEqual1(float a, float b); |
|||
|
|||
namespace NurbsBasis { |
|||
void myPrint11(int a, int b); |
|||
}; |
|||
|
|||
|
|||
#endif //NURBSEVALUATOR_NURBSBASIS_CUH |
|||
@ -1,791 +0,0 @@ |
|||
// |
|||
// Created by 14727 on 2022/11/7. |
|||
// |
|||
|
|||
#include <utility> |
|||
|
|||
#include "NurbsEvaluator.cuh" |
|||
#include "cstdio" |
|||
#include "utils.h" |
|||
//#include "NurbsBasis.cuh" |
|||
|
|||
__device__ void normalization(float &a, float &b, float &c) { |
|||
float sumA = a * a; |
|||
float sumB = b * b; |
|||
float sumC = c * c; |
|||
float sum = sumA + sumB + sumC; |
|||
a = sqrt(sumA / sum); |
|||
b = sqrt(sumB / sum); |
|||
c = sqrt(sumC / sum); |
|||
} |
|||
|
|||
//extern __device__ void NurbsBasis::d_basisFunction(float *N_Texture, const float *knots, float u, int degree, int d_knotsCnt) {}; |
|||
|
|||
__host__ NurbsSurface::Evaluator::Evaluator(std::vector<std::vector<std::vector<float>>> controlPoints, |
|||
std::vector<float> knots_u, std::vector<float> knots_v) { |
|||
this->knots_u = std::move(knots_u); |
|||
this->knots_v = std::move(knots_v); |
|||
this->controlPoints = std::move(controlPoints); |
|||
recordTime = false; |
|||
d_nTexture_u = nullptr; |
|||
d_nTexture_v = nullptr; |
|||
d_nTexture1_u = nullptr; |
|||
d_nTexture1_v = nullptr; |
|||
d_knots_u = nullptr; |
|||
d_knots_v = nullptr; |
|||
d_points = nullptr; |
|||
d_derivatives = nullptr; |
|||
} |
|||
|
|||
__host__ std::vector<std::map<std::pair<float, float>, std::vector<float>>> |
|||
NurbsSurface::Evaluator::evaluate(int sampleCnt_u, int sampleCnt_v) { |
|||
if (POINT_SIZE != controlPoints[0][0].size()) { |
|||
printf("Error! Nurbs控制点应表示为长度为4的齐次坐标\n"); |
|||
return {}; |
|||
} |
|||
|
|||
// 构造指向device的controlPoints |
|||
const int pointsCnt_u = controlPoints.size(), pointsCnt_v = controlPoints[0].size(); |
|||
const int pointsBytes = pointsCnt_u * pointsCnt_v * POINT_SIZE * sizeof(float); |
|||
auto *h_points = (float *) malloc(pointsBytes); |
|||
for (int i = 0; i < pointsCnt_u; i++) { |
|||
for (int j = 0; j < pointsCnt_v; j++) { |
|||
for (int k = 0; k < POINT_SIZE; k++) { |
|||
h_points[(i * pointsCnt_v + j) * POINT_SIZE + k] = controlPoints[i][j][k]; |
|||
} |
|||
} |
|||
} |
|||
cudaMalloc((void **) &d_points, pointsBytes); |
|||
cudaMemcpy(d_points, h_points, pointsBytes, cudaMemcpyHostToDevice); |
|||
|
|||
// 构造指向device的knots |
|||
const int knotsCnt_u = knots_u.size(), knotsCnt_v = knots_v.size(); |
|||
const int knotsBytes_u = knotsCnt_u * sizeof(float), knotsBytes_v = knotsCnt_v * sizeof(float); |
|||
auto *h_knots_u = (float *) malloc(knotsBytes_u), *h_knots_v = (float *) malloc(knotsBytes_v); |
|||
for (int i = 0; i < knotsCnt_u; i++) h_knots_u[i] = knots_u[i]; |
|||
for (int i = 0; i < knotsCnt_v; i++) h_knots_v[i] = knots_v[i]; |
|||
|
|||
cudaMalloc((void **) &d_knots_u, knotsBytes_u); |
|||
cudaMalloc((void **) &d_knots_v, knotsBytes_v); |
|||
cudaMemcpy(d_knots_u, h_knots_u, knotsBytes_u, cudaMemcpyHostToDevice); |
|||
cudaMemcpy(d_knots_v, h_knots_v, knotsBytes_v, cudaMemcpyHostToDevice); |
|||
|
|||
// 构造nTexture |
|||
cudaMalloc((void **) &d_nTexture_u, |
|||
sampleCnt_u * pointsCnt_u * sizeof(float)); // 注意nTexture的大小,在算梯度时用得到i=pointsCnt + 1的基函数值 |
|||
cudaMalloc((void **) &d_nTexture_v, sampleCnt_v * pointsCnt_v * sizeof(float)); |
|||
|
|||
// 构造nTexture1 |
|||
cudaMalloc((void **) &d_nTexture1_u, sampleCnt_u * (pointsCnt_u + 1) * sizeof(float)); |
|||
cudaMalloc((void **) &d_nTexture1_v, sampleCnt_v * (pointsCnt_v + 1) * sizeof(float)); |
|||
|
|||
// 构造g_basisTexture线程层级 |
|||
dim3 blockBasis(512); |
|||
dim3 gridBasis_u((sampleCnt_u + blockBasis.x - 1) / blockBasis.x); |
|||
dim3 gridBasis_v((sampleCnt_v + blockBasis.x - 1) / blockBasis.x); |
|||
|
|||
// 构造线程层级,调用核函数 |
|||
dim3 block(32, 32); |
|||
dim3 grid((sampleCnt_u + block.x - 1) / block.x, (sampleCnt_v + block.y - 1) / block.y); |
|||
// 记录用时 |
|||
double time_cost_device; |
|||
g_basisTexture<<<gridBasis_u, blockBasis>>>(d_nTexture_u, d_nTexture1_u, d_knots_u, pointsCnt_u, knotsCnt_u, |
|||
sampleCnt_u); |
|||
cudaDeviceSynchronize(); |
|||
g_basisTexture<<<gridBasis_v, blockBasis>>>(d_nTexture_v, d_nTexture1_v, d_knots_v, pointsCnt_v, knotsCnt_v, |
|||
sampleCnt_v); |
|||
cudaDeviceSynchronize(); |
|||
|
|||
if (recordTime) time_cost_device = utils::get_time_windows(); |
|||
g_evaluate <<<grid, block>>>(d_nTexture_u, d_nTexture_v, d_points, pointsCnt_u, pointsCnt_v, POINT_SIZE, |
|||
knots_u[knotsCnt_u - 1], knots_v[knotsCnt_v - 1], sampleCnt_u, sampleCnt_v); |
|||
cudaDeviceSynchronize(); // 所用线程结束后再获取结束时间。cudaThreadSynchronize()在CUDA1.0后被弃用 |
|||
if (recordTime) { |
|||
time_cost_device = utils::get_time_windows() - time_cost_device; |
|||
printf("GPU time cost of surface evaluation for %d samples: %lf\n", sampleCnt_u * sampleCnt_v, |
|||
time_cost_device); |
|||
} |
|||
|
|||
// 释放内存 |
|||
free(h_points); |
|||
free(h_knots_u); |
|||
free(h_knots_v); |
|||
printf("First derivatives and normal vectors calculated by GPU:\n"); |
|||
return {}; |
|||
} |
|||
|
|||
|
|||
__host__ std::vector<std::map<float, std::vector<float>>> |
|||
NurbsCurve::Evaluator::evaluate(int sampleCnt) { |
|||
|
|||
if (POINT_SIZE != controlPoints[0].size()) { |
|||
printf("Error! Nurbs控制点应表示为长度为4的齐次坐标\n"); |
|||
return {}; |
|||
} |
|||
// 构造指向device的controlPoints |
|||
const int pointsCnt = controlPoints.size(); |
|||
const int pointsBytes = pointsCnt * POINT_SIZE * sizeof(float); |
|||
auto *h_points = (float *) malloc(pointsBytes); |
|||
for (int i = 0; i < pointsCnt; i++) { |
|||
for (int j = 0; j < POINT_SIZE; j++) { |
|||
h_points[i * POINT_SIZE + j] = controlPoints[i][j]; |
|||
} |
|||
} |
|||
myCudaFree(d_points); // 注意内存管理 |
|||
cudaMalloc((void **) &d_points, pointsBytes); |
|||
cudaMemcpy(d_points, h_points, pointsBytes, cudaMemcpyHostToDevice); |
|||
|
|||
// 构造指向device的knots |
|||
const int knotsCnt = knots.size(); |
|||
const int knotsBytes = knotsCnt * sizeof(float); |
|||
auto *h_knots = (float *) malloc(knotsBytes); |
|||
for (int i = 0; i < knotsCnt; i++) h_knots[i] = knots[i]; |
|||
myCudaFree(d_knots); // 注意内存管理 |
|||
cudaMalloc((void **) &d_knots, knotsBytes); |
|||
cudaMemcpy(d_knots, h_knots, knotsBytes, cudaMemcpyHostToDevice); |
|||
|
|||
// 分配nTexture的内存。只需要GPU内存 |
|||
// float *d_nTexture = nullptr; |
|||
myCudaFree(d_nTexture); // 注意内存管理 |
|||
cudaMalloc((void **) &d_nTexture, |
|||
sampleCnt * pointsCnt * sizeof(float)); // 注意nTexture的大小,在算梯度时用得到i=pointsCnt + 1的基函数值 |
|||
|
|||
// 分配nTexture1的内存。只需要GPU内存 |
|||
// float *d_nTexture1 = nullptr; |
|||
myCudaFree(d_nTexture1); // 注意内存管理 |
|||
cudaMalloc((void **) &d_nTexture1, sampleCnt * (pointsCnt + 1) * sizeof(float)); |
|||
|
|||
// 构造g_basisTexture线程层级 |
|||
dim3 blockBasis(512); |
|||
dim3 gridBasis((sampleCnt + blockBasis.x - 1) / blockBasis.x); |
|||
|
|||
// 构造线程层级 |
|||
dim3 block(32, 32); |
|||
dim3 grid((sampleCnt + block.x * block.y - 1) / (block.x * block.y)); |
|||
// 记录用时 |
|||
double time_cost_device; |
|||
if (recordTime) time_cost_device = utils::get_time_windows(); |
|||
printf("there..\n"); |
|||
g_basisTexture <<<gridBasis, blockBasis>>>(d_nTexture, d_nTexture1, d_knots, pointsCnt, knotsCnt, sampleCnt); |
|||
// cudaMemcpy(d_nTextureCpy, d_nTexture, nTextureBytes, cudaMemcpyDeviceToDevice); // 有同步功能 |
|||
cudaDeviceSynchronize(); |
|||
printf("here..\n"); |
|||
g_evaluate <<<grid, block>>>(d_nTexture, d_points, pointsCnt, POINT_SIZE, knots[knotsCnt - 1], sampleCnt); |
|||
// g_test<<<1,6>>>(d_nTextureCpy); |
|||
cudaDeviceSynchronize(); // 所用线程结束后再获取结束时间。cudaThreadSynchronize()在CUDA1.0后被弃用 |
|||
if (recordTime) { |
|||
time_cost_device = utils::get_time_windows() - time_cost_device; |
|||
printf("GPU time cost of curve evaluation for %d samples: %lf\n", sampleCnt, time_cost_device); |
|||
} |
|||
free(h_points); |
|||
free(h_knots); |
|||
return {}; |
|||
} |
|||
|
|||
__host__ void NurbsSurface::Evaluator::derivative(int sampleCnt_u, int sampleCnt_v) { |
|||
// 先完成evaluation |
|||
evaluate(sampleCnt_u, sampleCnt_v); |
|||
|
|||
if (POINT_SIZE != controlPoints[0][0].size()) { |
|||
printf("Error! Nurbs控制点应表示为长度为4的齐次坐标\n"); |
|||
return; |
|||
} |
|||
|
|||
float *d_derTexture_u = nullptr; |
|||
float *d_derTexture_v = nullptr; |
|||
const int pointsCnt_u = controlPoints.size(), pointsCnt_v = controlPoints[0].size(); |
|||
const int knotsCnt_u = knots_u.size(), knotsCnt_v = knots_v.size(); |
|||
cudaMalloc((void **) &d_derTexture_u, sampleCnt_u * pointsCnt_u * sizeof(float)); |
|||
cudaMalloc((void **) &d_derTexture_v, sampleCnt_v * pointsCnt_v * sizeof(float)); |
|||
|
|||
// 构造切向量计算结果 |
|||
myCudaFree(d_derivatives); |
|||
cudaMalloc((void **) &d_derivatives, |
|||
sampleCnt_v * sampleCnt_u * 6 * sizeof(float)); // 每个采用所求的切向量是一个六元向量,前三位是对u的偏导、后三位是对v的偏导 |
|||
|
|||
// 构造线程层级 |
|||
dim3 block(32, 32); |
|||
dim3 grid((sampleCnt_u + block.x - 1) / block.x, (sampleCnt_v + block.y - 1) / block.y); |
|||
// 构造g_basisTexture线程层级 |
|||
dim3 blockTex(512); |
|||
dim3 gridTex_u((sampleCnt_u + blockTex.x - 1) / blockTex.x); |
|||
dim3 gridTex_v((sampleCnt_v + blockTex.x - 1) / blockTex.x); |
|||
// 记录用时 |
|||
double time_cost_device; |
|||
if (recordTime) time_cost_device = utils::get_time_windows(); |
|||
g_derTexture<<<gridTex_u, blockTex>>>(d_derTexture_u, d_nTexture1_u, d_knots_u, pointsCnt_u, knotsCnt_u, |
|||
sampleCnt_u); |
|||
g_derTexture<<<gridTex_v, blockTex>>>(d_derTexture_v, d_nTexture1_v, d_knots_v, pointsCnt_v, knotsCnt_v, |
|||
sampleCnt_v); |
|||
cudaDeviceSynchronize(); |
|||
g_derivative<<<grid, block>>>(d_derivatives, d_derTexture_u, d_derTexture_v, d_nTexture_u, d_nTexture_v, d_points, |
|||
pointsCnt_u, |
|||
pointsCnt_v, POINT_SIZE, knots_u[knotsCnt_u - 1], knots_v[knotsCnt_v - 1], |
|||
sampleCnt_u, |
|||
sampleCnt_v); |
|||
cudaDeviceSynchronize(); // 所用线程结束后再获取结束时间。cudaThreadSynchronize()在CUDA1.0后被弃用 |
|||
if (recordTime) { |
|||
time_cost_device = utils::get_time_windows() - time_cost_device; |
|||
printf("GPU time cost of surface first derivative calculating for %d samples: %lf\n", sampleCnt_u * sampleCnt_v, |
|||
time_cost_device); |
|||
} |
|||
cudaFree(d_derTexture_u); |
|||
cudaFree(d_derTexture_v); |
|||
} |
|||
|
|||
__host__ void NurbsCurve::Evaluator::derivative(int sampleCnt) { |
|||
// 先完成evaluation |
|||
evaluate(sampleCnt); |
|||
|
|||
if (POINT_SIZE != controlPoints[0].size()) { |
|||
printf("Error! Nurbs控制点应表示为长度为4的齐次坐标\n"); |
|||
return; |
|||
} |
|||
|
|||
float *d_derTexture = nullptr; |
|||
const int pointsCnt = controlPoints.size(); |
|||
const int knotsCnt = knots.size(); |
|||
cudaMalloc((void **) &d_derTexture, sampleCnt * pointsCnt * sizeof(float)); |
|||
|
|||
// 构造线程层级 |
|||
dim3 block(32, 32); |
|||
dim3 grid((sampleCnt + block.x * block.y - 1) / (block.x * block.y)); |
|||
// 构造g_basisTexture线程层级 |
|||
dim3 blockTex(512); |
|||
dim3 gridTex((sampleCnt + blockTex.x - 1) / blockTex.x); |
|||
|
|||
|
|||
// 构造切向量计算结果 |
|||
myCudaFree(d_derivatives); |
|||
cudaMalloc((void **) &d_derivatives, sampleCnt * 3 * sizeof(float)); // 每个采用所求的切向量是一个三维向量 |
|||
|
|||
// 记录用时 |
|||
double time_cost_device; |
|||
if (recordTime) time_cost_device = utils::get_time_windows(); |
|||
g_derTexture<<<gridTex, blockTex>>>(d_derTexture, d_nTexture1, d_knots, pointsCnt, knotsCnt, sampleCnt); |
|||
cudaDeviceSynchronize(); |
|||
g_derivative<<<grid, block>>>(d_derivatives, d_derTexture, d_nTexture, d_points, pointsCnt, POINT_SIZE, |
|||
knots[knotsCnt - 1], sampleCnt); |
|||
cudaDeviceSynchronize(); // 所用线程结束后再获取结束时间。cudaThreadSynchronize()在CUDA1.0后被弃用 |
|||
if (recordTime) { |
|||
time_cost_device = utils::get_time_windows() - time_cost_device; |
|||
printf("GPU time cost of curve first derivative calculating for %d samples: %lf\n", sampleCnt, |
|||
time_cost_device); |
|||
} |
|||
cudaFree(d_derTexture); |
|||
} |
|||
|
|||
__host__ void NurbsSurface::Evaluator::curvature(int sampleCnt_u, int sampleCnt_v) { |
|||
// 先计算切向量 |
|||
derivative(sampleCnt_u, sampleCnt_v); |
|||
if (POINT_SIZE != controlPoints[0][0].size()) { |
|||
printf("Error! Nurbs控制点应表示为长度为4的齐次坐标\n"); |
|||
return; |
|||
} |
|||
|
|||
// 构造线程层级 |
|||
dim3 block(32, 32); |
|||
dim3 grid((sampleCnt_u + block.x - 1) / block.x, (sampleCnt_v + block.y - 1) / block.y); |
|||
|
|||
// 记录用时 |
|||
double time_cost_device; |
|||
if (recordTime) time_cost_device = utils::get_time_windows(); |
|||
g_curvature<<<grid, block>>>(d_derivatives, sampleCnt_u, sampleCnt_v, knots_u[knots_u.size() - 1], |
|||
knots_v[knots_v.size() - 1]); |
|||
cudaDeviceSynchronize(); // 所用线程结束后再获取结束时间。cudaThreadSynchronize()在CUDA1.0后被弃用 |
|||
if (recordTime) { |
|||
time_cost_device = utils::get_time_windows() - time_cost_device; |
|||
printf("GPU time cost of surface second derivative calculating for %d samples: %lf\n", |
|||
sampleCnt_u * sampleCnt_v, |
|||
time_cost_device); |
|||
} |
|||
} |
|||
|
|||
__host__ void NurbsCurve::Evaluator::curvature(int sampleCnt) { |
|||
// 先计算切向量 |
|||
derivative(sampleCnt); |
|||
|
|||
if (POINT_SIZE != controlPoints[0].size()) { |
|||
printf("Error! Nurbs控制点应表示为长度为4的齐次坐标\n"); |
|||
return; |
|||
} |
|||
|
|||
// 构造线程层级 |
|||
dim3 block(32, 32); |
|||
dim3 grid((sampleCnt + block.x * block.y - 1) / (block.x * block.y)); |
|||
|
|||
// 记录用时 |
|||
double time_cost_device; |
|||
if (recordTime) time_cost_device = utils::get_time_windows(); |
|||
g_curvature<<<grid, block>>>(d_derivatives, sampleCnt, knots[knots.size() - 1]); |
|||
cudaDeviceSynchronize(); // 所用线程结束后再获取结束时间。cudaThreadSynchronize()在CUDA1.0后被弃用 |
|||
if (recordTime) { |
|||
time_cost_device = utils::get_time_windows() - time_cost_device; |
|||
printf("GPU time cost of curve second derivative calculating for %d samples: %lf\n", sampleCnt, |
|||
time_cost_device); |
|||
} |
|||
} |
|||
|
|||
|
|||
//__global__ void |
|||
//NurbsSurface::g_evaluate(const float *d_points, const float *d_knots_u, const float *d_knots_v, |
|||
// int d_pointsCnt_u, |
|||
// int d_pointsCnt_v, int d_POINT_SIZE, int d_knotsCnt_u, int d_knotsCnt_v, |
|||
// int d_sampleCnt_u, int d_sampleCnt_v) { |
|||
//// printf(" surface calculating... \n"); |
|||
// // 二维grid和二维的block |
|||
// int ix = int(blockIdx.x * blockDim.x + threadIdx.x); |
|||
// int iy = int(blockIdx.y * blockDim.y + threadIdx.y); |
|||
// |
|||
// float d_paramCeil_u = d_knots_u[d_knotsCnt_u - 1]; |
|||
// float d_paramCeil_v = d_knots_v[d_knotsCnt_v - 1]; |
|||
// |
|||
// float u = ix * d_paramCeil_u / (d_sampleCnt_u - 1); |
|||
// float v = iy * d_paramCeil_v / (d_sampleCnt_v - 1); |
|||
// |
|||
// if (u > 1.0 * d_paramCeil_u || v > 1.0 * d_paramCeil_v) { |
|||
// return; |
|||
// } |
|||
// |
|||
// int d_degree_u = d_knotsCnt_u - 1 - d_pointsCnt_u; |
|||
// int d_degree_v = d_knotsCnt_v - 1 - d_pointsCnt_v; |
|||
// // 注意,在device中,全局内存还是以malloc和free的方式分配和回收的,而不是使用cudaMalloc和cudaFree |
|||
// auto *N_Texture_U = (float *) malloc((d_degree_u + 1) * (d_knotsCnt_u - 1) * sizeof(float)); |
|||
// auto *N_Texture_V = (float *) malloc((d_degree_v + 1) * (d_knotsCnt_v - 1) * sizeof(float)); |
|||
// d_basisFunction(N_Texture_U, d_knots_u, u, d_degree_u, d_knotsCnt_u); |
|||
// d_basisFunction(N_Texture_V, d_knots_v, v, d_degree_v, d_knotsCnt_v); |
|||
// float x = 0., y = 0., z = 0.; |
|||
// for (int i = 0; i < d_pointsCnt_u; i++) { |
|||
// for (int j = 0; j < d_pointsCnt_v; j++) { |
|||
// float N_U = N_Texture_U[d_degree_u * (d_knotsCnt_u - 1) + i]; |
|||
// float N_V = N_Texture_V[d_degree_v * (d_knotsCnt_v - 1) + j]; |
|||
// int idx = (i * d_pointsCnt_v + j) * d_POINT_SIZE; |
|||
// x += N_U * N_V * d_points[idx]; |
|||
// y += N_U * N_V * d_points[idx + 1]; |
|||
// z += N_U * N_V * d_points[idx + 2]; |
|||
// } |
|||
// } |
|||
// printf("(%g, %g)-->(%g, %g, %g)\n", u, v, x, y, z); // %g输出,舍弃无意义的0 |
|||
// free(N_Texture_U); |
|||
// free(N_Texture_V); |
|||
//} |
|||
|
|||
__global__ void |
|||
NurbsSurface::g_evaluate(const float *d_nTexture_u, const float *d_nTexture_v, const float *d_points, int d_pointsCnt_u, |
|||
int d_pointsCnt_v, int d_POINT_SIZE, float d_lastKnot_u, float d_lastKnot_v, int d_sampleCnt_u, |
|||
int d_sampleCnt_v) { |
|||
|
|||
// 二维grid和二维的block |
|||
int ix = blockIdx.x * blockDim.x + threadIdx.x; |
|||
int iy = blockIdx.y * blockDim.y + threadIdx.y; |
|||
|
|||
float u = ix * d_lastKnot_u / (d_sampleCnt_u - 1); |
|||
float v = iy * d_lastKnot_v / (d_sampleCnt_v - 1); |
|||
|
|||
if (u > 1.0 * d_lastKnot_u || v > 1.0 * d_lastKnot_v) { |
|||
return; |
|||
} |
|||
|
|||
float x = 0., y = 0., z = 0., sumW = 0.; |
|||
for (int i = 0; i < d_pointsCnt_u; i++) { |
|||
float N_U = d_nTexture_u[ix * d_pointsCnt_u + i]; |
|||
for (int j = 0; j < d_pointsCnt_v; j++) { |
|||
float N_V = d_nTexture_v[iy * d_pointsCnt_v + j]; |
|||
int idx = (i * d_pointsCnt_v + j) * d_POINT_SIZE; |
|||
float w = d_points[idx + 3]; |
|||
x += N_U * N_V * w * d_points[idx]; |
|||
y += N_U * N_V * w * d_points[idx + 1]; |
|||
z += N_U * N_V * w * d_points[idx + 2]; |
|||
sumW += N_U * N_V * w; |
|||
} |
|||
} |
|||
x = x / sumW; |
|||
y = y / sumW; |
|||
z = z / sumW; |
|||
// printf("(%g, %g)-->(%g, %g, %g)\n", u, v, x, y, z); // %g输出,舍弃无意义的0 |
|||
} |
|||
|
|||
__global__ void |
|||
NurbsSurface::g_derivative(float *derivatives, const float *derTexture_u, const float *derTexture_v, |
|||
const float *nTexture_u, const float *nTexture_v, const float *d_points, int d_pointsCnt_u, |
|||
int d_pointsCnt_v, int d_POINT_SIZE, float d_lastKnot_u, float d_lastKnot_v, |
|||
int d_sampleCnt_u, int d_sampleCnt_v) { |
|||
// 二维grid和二维的block |
|||
int ix = blockIdx.x * blockDim.x + threadIdx.x; |
|||
int iy = blockIdx.y * blockDim.y + threadIdx.y; |
|||
|
|||
if (ix >= d_sampleCnt_u || iy >= d_sampleCnt_v) { |
|||
return; |
|||
} |
|||
float u = ix * d_lastKnot_u / (d_sampleCnt_u - 1); |
|||
float v = iy * d_lastKnot_v / (d_sampleCnt_v - 1); |
|||
|
|||
float nubsPdx_u = 0., nubsPdy_u = 0, nubsPdz_u = 0., nubsPdw_u = 0.; |
|||
float nubsPdx_v = 0., nubsPdy_v = 0, nubsPdz_v = 0., nubsPdw_v = 0.; |
|||
|
|||
for (int i = 0; i < d_pointsCnt_u; i++) { |
|||
for (int j = 0; j < d_pointsCnt_u; j++) { |
|||
int baseIdx = (i * d_pointsCnt_v + j) * d_POINT_SIZE; |
|||
float factor_u = derTexture_u[ix * d_pointsCnt_u + i] * nTexture_v[iy * d_pointsCnt_v + j]; |
|||
float factor_v = derTexture_v[iy * d_pointsCnt_v + j] * nTexture_u[ix * d_pointsCnt_u + i]; |
|||
float wij = d_points[baseIdx + 3]; |
|||
nubsPdx_u += factor_u * wij * d_points[baseIdx]; |
|||
nubsPdy_u += factor_u * wij * d_points[baseIdx + 1]; |
|||
nubsPdz_u += factor_u * wij * d_points[baseIdx + 2]; |
|||
nubsPdw_u += factor_u * wij; |
|||
|
|||
nubsPdx_v += factor_v * wij * d_points[baseIdx]; |
|||
nubsPdy_v += factor_v * wij * d_points[baseIdx + 1]; |
|||
nubsPdz_v += factor_v * wij * d_points[baseIdx + 2]; |
|||
nubsPdw_v += factor_v * wij; |
|||
} |
|||
} |
|||
|
|||
float x = 0., y = 0., z = 0., w = 0.; |
|||
for (int i = 0; i < d_pointsCnt_u; i++) { |
|||
float N_U = nTexture_u[ix * d_pointsCnt_u + i]; |
|||
for (int j = 0; j < d_pointsCnt_v; j++) { |
|||
float N_V = nTexture_v[iy * d_pointsCnt_v + j]; |
|||
int idx = (i * d_pointsCnt_v + j) * d_POINT_SIZE; |
|||
float wij = d_points[idx + 3]; |
|||
x += N_U * N_V * wij * d_points[idx]; |
|||
y += N_U * N_V * wij * d_points[idx + 1]; |
|||
z += N_U * N_V * wij * d_points[idx + 2]; |
|||
w += N_U * N_V * wij; |
|||
} |
|||
} |
|||
|
|||
float w2 = w * w; |
|||
float pdx_u = (nubsPdx_u * w - x * nubsPdw_u) / w2; |
|||
float pdy_u = (nubsPdy_u * w - y * nubsPdw_u) / w2; |
|||
float pdz_u = (nubsPdz_u * w - z * nubsPdw_u) / w2; |
|||
|
|||
float pdx_v = (nubsPdx_v * w - x * nubsPdw_v) / w2; |
|||
float pdy_v = (nubsPdy_v * w - y * nubsPdw_v) / w2; |
|||
float pdz_v = (nubsPdz_v * w - z * nubsPdw_v) / w2; |
|||
// float pdz_u = (nubsPdz_u * w - z ) |
|||
|
|||
int baseIdx = (ix * d_sampleCnt_v + iy) * 6; |
|||
derivatives[baseIdx] = pdx_u; |
|||
derivatives[baseIdx + 1] = pdy_u; |
|||
derivatives[baseIdx + 2] = pdz_u; |
|||
derivatives[baseIdx + 3] = pdx_v; |
|||
derivatives[baseIdx + 4] = pdy_v; |
|||
derivatives[baseIdx + 5] = pdz_v; |
|||
|
|||
float x_n = pdy_u * pdz_v - pdy_v * pdz_u, y_n = pdx_v * pdz_u - pdx_u * pdz_v, z_n = |
|||
pdx_u * pdy_v - pdx_v * pdy_u; // 叉乘得到法向量 |
|||
if((ix == 8 && iy == 9) || (ix == 7 && iy == 9) || (ix == 9 && iy == 9) || (ix == 8 && iy == 8) || (ix == 8 && iy == 10)) |
|||
printf("(%g,%g)-->u:(%g, %g, %g), v:(%g,%g,%g), normal:(%g,%g,%g)\n", u, v, pdx_u, pdy_u, pdz_u, pdx_v, pdy_v, |
|||
pdz_v, x_n, y_n, z_n); |
|||
} |
|||
|
|||
__global__ void |
|||
NurbsSurface::g_curvature(const float *derivatives, int sampleCnt_u, int sampleCnt_v, float lastKnot_u, |
|||
float lastKnot_v) { |
|||
// 二维grid和二维的block |
|||
int ix = blockIdx.x * blockDim.x + threadIdx.x; |
|||
int iy = blockIdx.y * blockDim.y + threadIdx.y; |
|||
|
|||
if (ix >= sampleCnt_u || iy >= sampleCnt_v) { |
|||
return; |
|||
} |
|||
|
|||
|
|||
float step_u = lastKnot_u / (sampleCnt_u - 1), step_v = lastKnot_v / (sampleCnt_v - 1); |
|||
float u = ix * step_u, v = iy * step_v; |
|||
|
|||
int baseIdx = (ix * sampleCnt_v + iy) * 6; |
|||
int lastBaseIdx_u = ((ix - 1) * sampleCnt_v + iy) * 6, nextBaseIdx_u = ((ix + 1) * sampleCnt_v + iy) * 6; |
|||
int lastBaseIdx_v = (ix * sampleCnt_v + iy - 1) * 6, nextBaseIdx_v = (ix * sampleCnt_v + iy + 1) * 6; |
|||
|
|||
// printf("(%g,%g)-->u:(%g, %g, %g), v:(%g,%g,%g)\n", u, v, derivatives[baseIdx], derivatives[baseIdx + 1], |
|||
// derivatives[baseIdx + 2], derivatives[baseIdx + 3], derivatives[baseIdx + 4], derivatives[baseIdx + 5]); |
|||
|
|||
float sndPdx_uu, sndPdy_uu, sndPdz_uu, sndPdx_vv, sndPdy_vv, sndPdz_vv; // 二阶导 |
|||
float sndPdx_uv, sndPdy_uv, sndPdz_uv, sndPdx_vu, sndPdy_vu, sndPdz_vu; |
|||
|
|||
if (ix == 0) { |
|||
sndPdx_uu = (derivatives[nextBaseIdx_u] - derivatives[baseIdx]) / step_u; |
|||
sndPdy_uu = (derivatives[nextBaseIdx_u + 1] - derivatives[baseIdx + 1]) / step_u; |
|||
sndPdz_uu = (derivatives[nextBaseIdx_u + 2] - derivatives[baseIdx + 2]) / step_u; |
|||
|
|||
sndPdx_vu = (derivatives[nextBaseIdx_u + 3] - derivatives[baseIdx + 3]) / step_u; |
|||
sndPdy_vu = (derivatives[nextBaseIdx_u + 4] - derivatives[baseIdx + 4]) / step_u; |
|||
sndPdz_vu = (derivatives[nextBaseIdx_u + 5] - derivatives[baseIdx + 5]) / step_u; |
|||
} else if (ix == sampleCnt_u - 1) { |
|||
sndPdx_uu = (derivatives[baseIdx] - derivatives[lastBaseIdx_u]) / step_u; |
|||
sndPdy_uu = (derivatives[baseIdx + 1] - derivatives[lastBaseIdx_u + 1]) / step_u; |
|||
sndPdz_uu = (derivatives[baseIdx + 2] - derivatives[lastBaseIdx_u + 2]) / step_u; |
|||
|
|||
sndPdx_vu = (derivatives[baseIdx + 3] - derivatives[lastBaseIdx_u + 3]) / step_u; |
|||
sndPdy_vu = (derivatives[baseIdx + 4] - derivatives[lastBaseIdx_u + 4]) / step_u; |
|||
sndPdz_vu = (derivatives[baseIdx + 5] - derivatives[lastBaseIdx_u + 5]) / step_u; |
|||
} else { |
|||
sndPdx_uu = (derivatives[nextBaseIdx_u] - derivatives[lastBaseIdx_u]) / (2 * step_u); |
|||
sndPdy_uu = (derivatives[nextBaseIdx_u + 1] - derivatives[lastBaseIdx_u + 1]) / (2 * step_u); |
|||
sndPdz_uu = (derivatives[nextBaseIdx_u + 2] - derivatives[lastBaseIdx_u + 2]) / (2 * step_u); |
|||
|
|||
sndPdx_vu = (derivatives[nextBaseIdx_u + 3] - derivatives[lastBaseIdx_u + 3]) / (2 * step_u); |
|||
sndPdy_vu = (derivatives[nextBaseIdx_u + 4] - derivatives[lastBaseIdx_u + 4]) / (2 * step_u); |
|||
sndPdz_vu = (derivatives[nextBaseIdx_u + 5] - derivatives[lastBaseIdx_u + 5]) / (2 * step_u); |
|||
} |
|||
|
|||
if (iy == 0) { |
|||
sndPdx_vv = (derivatives[nextBaseIdx_v + 3] - derivatives[baseIdx + 3]) / step_v; |
|||
sndPdy_vv = (derivatives[nextBaseIdx_v + 4] - derivatives[baseIdx + 4]) / step_v; |
|||
sndPdz_vv = (derivatives[nextBaseIdx_v + 5] - derivatives[baseIdx + 5]) / step_v; |
|||
|
|||
sndPdx_uv = (derivatives[nextBaseIdx_v] - derivatives[baseIdx]) / step_v; |
|||
sndPdy_uv = (derivatives[nextBaseIdx_v + 1] - derivatives[baseIdx + 1]) / step_v; |
|||
sndPdz_uv = (derivatives[nextBaseIdx_v + 2] - derivatives[baseIdx + 2]) / step_v; |
|||
} else if (iy == sampleCnt_v - 1) { |
|||
sndPdx_vv = (derivatives[baseIdx + 3] - derivatives[lastBaseIdx_v + 3]) / step_v; |
|||
sndPdy_vv = (derivatives[baseIdx + 4] - derivatives[lastBaseIdx_v + 4]) / step_v; |
|||
sndPdz_vv = (derivatives[baseIdx + 5] - derivatives[lastBaseIdx_v + 5]) / step_v; |
|||
|
|||
sndPdx_uv = (derivatives[baseIdx] - derivatives[lastBaseIdx_v]) / step_v; |
|||
sndPdy_uv = (derivatives[baseIdx + 1] - derivatives[lastBaseIdx_v + 1]) / step_v; |
|||
sndPdz_uv = (derivatives[baseIdx + 2] - derivatives[lastBaseIdx_v + 2]) / step_v; |
|||
} else { |
|||
sndPdx_vv = (derivatives[nextBaseIdx_v + 3] - derivatives[lastBaseIdx_v + 3]) / (2 * step_v); |
|||
sndPdy_vv = (derivatives[nextBaseIdx_v + 4] - derivatives[lastBaseIdx_v + 4]) / (2 * step_v); |
|||
sndPdz_vv = (derivatives[nextBaseIdx_v + 5] - derivatives[lastBaseIdx_v + 5]) / (2 * step_v); |
|||
|
|||
sndPdx_uv = (derivatives[nextBaseIdx_v] - derivatives[lastBaseIdx_v]) / (2 * step_v); |
|||
sndPdy_uv = (derivatives[nextBaseIdx_v + 1] - derivatives[lastBaseIdx_v + 1]) / (2 * step_v); |
|||
sndPdz_uv = (derivatives[nextBaseIdx_v + 2] - derivatives[lastBaseIdx_v + 2]) / (2 * step_v); |
|||
} |
|||
|
|||
float uvx = (sndPdx_uv + sndPdx_vu) / 2, uvy = (sndPdy_uv + sndPdy_vu) / 2, uvz = (sndPdz_uv + sndPdz_vu) / 2; |
|||
normalization(sndPdx_uu, sndPdy_uu, sndPdz_uu); |
|||
normalization(uvx, uvy, uvz); |
|||
normalization(sndPdx_vv, sndPdy_vv, sndPdz_vv); |
|||
|
|||
|
|||
if(ix == 8 && iy == 9) |
|||
printf("(%g, %g) --> uu: (%g, %g, %g), uv: (%g, %g, %g), vv: (%g, %g, %g)\n", u, v, sndPdx_uu, sndPdy_uu, sndPdz_uu, |
|||
uvx, uvy, uvz, sndPdx_vv, sndPdy_vv, sndPdz_vv); |
|||
} |
|||
|
|||
__global__ void |
|||
NurbsCurve::g_evaluate(const float *NTexture, const float *d_points, const int d_pointsCnt, |
|||
const int d_POINT_SIZE, const float d_lastKnot, const int d_sampleCnt) { |
|||
// printf(" curve calculating... \n"); |
|||
// 二维grid和一维的block |
|||
// int idx = (blockIdx.y * gridDim.x + blockIdx.x) * blockDim.x + threadIdx.x; |
|||
// 二维block和一维grid |
|||
int idx = blockIdx.x * blockDim.x * blockDim.y + threadIdx.y * blockDim.x + threadIdx.x; |
|||
float u = idx * d_lastKnot / (d_sampleCnt - 1); |
|||
if (u > 1.0 * d_lastKnot) { |
|||
return; |
|||
} |
|||
// |
|||
// int d_degree = d_knotsCnt - 1 - d_pointsCnt; |
|||
// // 注意,在device中,全局内存还是以malloc和free的方式分配和回收的,而不是使用cudaMalloc和cudaFree |
|||
// auto *N_dp = (float *) malloc((d_degree + 1) * (d_knotsCnt - 1) * sizeof(float)); |
|||
// d_basisFunction(N_dp, d_knots, u, d_degree, d_knotsCnt); |
|||
float x = 0., y = 0., z = 0., sumW = 0.; |
|||
for (int i = 0; i < d_pointsCnt; i++) { |
|||
float N = NTexture[idx * d_pointsCnt + i]; |
|||
int baseIdx = i * d_POINT_SIZE; |
|||
float w = d_points[baseIdx + 3]; |
|||
x += N * w * d_points[baseIdx]; |
|||
y += N * w * d_points[baseIdx + 1]; |
|||
z += N * w * d_points[baseIdx + 2]; |
|||
sumW += N * w; |
|||
} |
|||
x = x / sumW; |
|||
y = y / sumW; |
|||
z = z / sumW; |
|||
printf("(%g)-->(%g, %g, %g)\n", u, x, y, z); // %g输出,舍弃无意义的0 |
|||
} |
|||
|
|||
__global__ void |
|||
NurbsCurve::g_derivative(float *derivatives, const float *derTexture, const float *nTexture, const float *d_points, |
|||
int d_pointsCnt, int d_POINT_SIZE, |
|||
float d_lastKnot, int d_sampleCnt) { |
|||
// 二维block和一维grid |
|||
int idx = blockIdx.x * blockDim.x * blockDim.y + threadIdx.y * blockDim.x + threadIdx.x; |
|||
if (idx >= d_sampleCnt) return; |
|||
float u = idx * d_lastKnot / (d_sampleCnt - 1); |
|||
float nubs_dx = 0., nubs_dy = 0., nubs_dz = 0., nubs_dw = 0.; |
|||
// printf("POINT_SIZE: %d\n", d_POINT_SIZE); |
|||
|
|||
for (int i = 0; i < d_pointsCnt; i++) { |
|||
int baseIdx = i * d_POINT_SIZE; |
|||
float nFactor = derTexture[idx * d_pointsCnt + i]; |
|||
float wi = d_points[baseIdx + 3]; |
|||
nubs_dx += nFactor * wi * d_points[baseIdx]; |
|||
nubs_dy += nFactor * wi * d_points[baseIdx + 1]; |
|||
nubs_dz += nFactor * wi * d_points[baseIdx + 2]; |
|||
nubs_dw += nFactor * wi; |
|||
// printf("(x, y, z): (%g, %g, %g)\n", d_points[baseIdx], d_points[baseIdx + 1], d_points[baseIdx + 2]); |
|||
} |
|||
|
|||
float x = 0., y = 0., z = 0., w = 0.; |
|||
for (int i = 0; i < d_pointsCnt; i++) { |
|||
float N = nTexture[idx * d_pointsCnt + i]; |
|||
int baseIdx = i * d_POINT_SIZE; |
|||
float wi = d_points[baseIdx + 3]; |
|||
x += N * wi * d_points[baseIdx]; |
|||
y += N * wi * d_points[baseIdx + 1]; |
|||
z += N * wi * d_points[baseIdx + 2]; |
|||
w += N * wi; |
|||
} |
|||
float dx = (nubs_dx * w - x * nubs_dw) / (w * w); |
|||
float dy = (nubs_dy * w - y * nubs_dw) / (w * w); |
|||
float dz = (nubs_dz * w - z * nubs_dw) / (w * w); |
|||
int baseIdx = idx * 3; |
|||
derivatives[baseIdx] = dx; |
|||
derivatives[baseIdx + 1] = dy; |
|||
derivatives[baseIdx + 2] = dz; |
|||
printf("(%g)-->(%g, %g, %g)\n", u, dx, dy, dz); |
|||
} |
|||
|
|||
__global__ void NurbsCurve::g_curvature(const float *derivatives, int sampleCnt, float lastKnot) { |
|||
// 二维block和一维grid |
|||
int idx = blockIdx.x * blockDim.x * blockDim.y + threadIdx.y * blockDim.x + threadIdx.x; |
|||
if (idx >= sampleCnt) return; |
|||
float step = lastKnot / (sampleCnt - 1); |
|||
float u = idx * step; |
|||
float sndPdx, sndPdy, sndPdz; // 二阶导 |
|||
int baseIdx = idx * 3, lastBaseIdx = (idx - 1) * 3, nextBaseIdx = (idx + 1) * 3; |
|||
if (idx == 0) { |
|||
sndPdx = (derivatives[nextBaseIdx] - derivatives[baseIdx]) / step; |
|||
sndPdy = (derivatives[nextBaseIdx + 1] - derivatives[baseIdx + 1]) / step; |
|||
sndPdz = (derivatives[nextBaseIdx + 2] - derivatives[baseIdx + 2]) / step; |
|||
} else if (idx == sampleCnt - 1) { |
|||
sndPdx = (derivatives[baseIdx] - derivatives[lastBaseIdx]) / step; |
|||
sndPdy = (derivatives[baseIdx + 1] - derivatives[lastBaseIdx + 1]) / step; |
|||
sndPdz = (derivatives[baseIdx + 2] - derivatives[lastBaseIdx + 2]) / step; |
|||
} else { |
|||
sndPdx = (derivatives[nextBaseIdx] - derivatives[lastBaseIdx]) / (2 * step); |
|||
sndPdy = (derivatives[nextBaseIdx + 1] - derivatives[lastBaseIdx + 1]) / (2 * step); |
|||
sndPdz = (derivatives[nextBaseIdx + 2] - derivatives[lastBaseIdx + 2]) / (2 * step); |
|||
} |
|||
printf("%g --> (%g, %g, %g)\n", u, sndPdx, sndPdy, sndPdz); |
|||
} |
|||
|
|||
__global__ void g_basisTexture(float *nTexture, float *nTexture1, const float *d_knots, int d_pointsCnt, int d_knotsCnt, |
|||
int d_sampleCnt) { |
|||
// 一维grid和一维block |
|||
int idx = blockIdx.x * blockDim.x + threadIdx.x; // 采样点编号 |
|||
float d_paramCeil = d_knots[d_knotsCnt - 1]; |
|||
float u = idx * d_paramCeil / (d_sampleCnt - 1); |
|||
if (u > 1.0 * d_paramCeil) { |
|||
return; |
|||
} |
|||
int d_degree = d_knotsCnt - 1 - d_pointsCnt; |
|||
auto *N_dp = (float *) malloc((d_degree + 1) * (d_knotsCnt - 1) * sizeof(float)); |
|||
d_basisFunction(N_dp, d_knots, u, d_degree, d_knotsCnt); |
|||
for (int i = 0; i < d_pointsCnt; i++) { |
|||
nTexture[idx * d_pointsCnt + i] = N_dp[d_degree * (d_knotsCnt - 1) + i]; |
|||
nTexture1[idx * (d_pointsCnt + 1) + i] = N_dp[(d_degree - 1) * (d_knotsCnt - 1) + i]; |
|||
// printf("nTexture1: %g ", nTexture1[idx * (d_pointsCnt + 1) + i]); |
|||
} |
|||
nTexture1[idx * (d_pointsCnt + 1) + d_pointsCnt] = N_dp[(d_degree - 1) * (d_knotsCnt - 1) + |
|||
d_pointsCnt]; // nTexture1多记录一列数据 |
|||
free(N_dp); |
|||
} |
|||
|
|||
|
|||
__global__ void |
|||
g_derTexture(float *derTexture, const float *nTexture1, const float *d_knots, int d_pointsCnt, int d_knotsCnt, |
|||
int d_sampleCnt) { |
|||
// 一维grid和一维block |
|||
int idx = blockIdx.x * blockDim.x + threadIdx.x; // 采样点编号 |
|||
if (idx >= d_sampleCnt) { |
|||
return; |
|||
} |
|||
int degree = d_knotsCnt - 1 - d_pointsCnt; |
|||
|
|||
// printf("degree: %d\n", degree); |
|||
for (int i = 0; i < d_pointsCnt; i++) { |
|||
float left = d_floatEqual(d_knots[i + degree], d_knots[i]) ? 0 : |
|||
nTexture1[idx * (d_pointsCnt + 1) + i] * (degree - 1) / (d_knots[i + degree] - d_knots[i]); |
|||
float right = d_floatEqual(d_knots[i + degree + 1], d_knots[i + 1]) ? 0 : |
|||
nTexture1[idx * (d_pointsCnt + 1) + i + 1] * (degree - 1) / |
|||
(d_knots[i + degree + 1] - d_knots[i + 1]); |
|||
derTexture[idx * d_pointsCnt + i] = left - right; |
|||
// printf("<%d, %d> -- %g \n", idx, i, left - right); |
|||
// printf("nTex1: %g \n", nTexture1[idx * (d_pointsCnt + 1) + i]); |
|||
} |
|||
} |
|||
|
|||
__host__ NurbsCurve::Evaluator::Evaluator(std::vector<std::vector<float>> controlPoints, |
|||
std::vector<float> knots) { |
|||
this->knots = std::move(knots); |
|||
this->controlPoints = std::move(controlPoints); |
|||
recordTime = false; |
|||
d_nTexture = nullptr; |
|||
d_nTexture1 = nullptr; |
|||
d_points = nullptr; |
|||
d_knots = nullptr; |
|||
d_derivatives = nullptr; |
|||
} |
|||
|
|||
|
|||
__device__ void d_basisFunction(float *N_Texture, const float *knots, float u, int degree, int d_knotsCnt) { |
|||
int m = d_knotsCnt - 1; |
|||
for (int p = 0; p <= degree; p++) { |
|||
for (int i = 0; i + p <= m - 1; i++) { |
|||
if (p == 0) { |
|||
if ((u > knots[i] || d_floatEqual(u, knots[i])) && (u < knots[i + 1]) |
|||
|| |
|||
d_floatEqual(u, knots[i + 1]) && d_floatEqual(u, knots[m])) { |
|||
N_Texture[p * m + i] = 1.0; |
|||
} else { |
|||
N_Texture[p * m + i] = 0.0; |
|||
} |
|||
} else { |
|||
float Nip_1 = N_Texture[(p - 1) * m + i]; |
|||
float Ni1p_1 = N_Texture[(p - 1) * m + i + 1]; |
|||
float left = d_floatEqual(knots[i + p], knots[i]) ? 0 : (u - knots[i]) * Nip_1 / |
|||
(knots[i + p] - knots[i]); |
|||
float right = d_floatEqual(knots[i + p + 1], knots[i + 1]) ? 0 : (knots[i + p + 1] - u) * Ni1p_1 / |
|||
(knots[i + p + 1] - knots[i + 1]); |
|||
N_Texture[p * m + i] = left + right; |
|||
} |
|||
} |
|||
} |
|||
} |
|||
|
|||
__device__ bool d_floatEqual(float a, float b) { |
|||
return abs(a - b) < 0.00001; |
|||
} |
|||
|
|||
__host__ void myCudaFree(float *&p) { |
|||
if (p != nullptr) { |
|||
cudaFree(p); |
|||
p = nullptr; |
|||
} |
|||
} |
|||
|
|||
|
|||
void NurbsCurve::Evaluator::setRecordTime(bool r) { |
|||
recordTime = r; |
|||
} |
|||
|
|||
void NurbsSurface::Evaluator::setRecordTime(bool r) { |
|||
recordTime = r; |
|||
} |
|||
|
|||
NurbsSurface::Evaluator::~Evaluator() { |
|||
myCudaFree(d_nTexture_u); |
|||
myCudaFree(d_nTexture_v); |
|||
myCudaFree(d_nTexture1_u); |
|||
myCudaFree(d_nTexture1_v); |
|||
myCudaFree(d_points); |
|||
myCudaFree(d_knots_u); |
|||
myCudaFree(d_knots_v); |
|||
cudaDeviceReset(); |
|||
} |
|||
|
|||
NurbsCurve::Evaluator::~Evaluator() { |
|||
myCudaFree(d_nTexture); |
|||
myCudaFree(d_nTexture1); |
|||
myCudaFree(d_points); |
|||
myCudaFree(d_knots); |
|||
cudaDeviceReset(); |
|||
} |
|||
@ -1,191 +0,0 @@ |
|||
#ifndef UNTITLED1_NURBSEVALUATOR_CUH |
|||
#define UNTITLED1_NURBSEVALUATOR_CUH |
|||
|
|||
#include <cuda_runtime.h> |
|||
#include <thrust/device_vector.h> |
|||
#include <vector> |
|||
#include <map> |
|||
|
|||
const int POINT_SIZE = 4; |
|||
|
|||
/** |
|||
* 保证释放后的指针指向空。这样一来保证指针不乱指,free的时候不会出错、二来可以判断指针是否已经free |
|||
* 注意指针是引用传参,因为要把指针本身置空 |
|||
*/ |
|||
__host__ void myCudaFree(float *&p); |
|||
|
|||
namespace NurbsSurface { |
|||
/** |
|||
* 曲线计算的核函数 |
|||
* @param d_pointSize 点的大小(3: [x, y, z] | 4:[x, y, z, w]) |
|||
*/ |
|||
__global__ static void |
|||
g_evaluate(const float *d_nTexture_u, const float *d_nTexture_v, const float *d_points, int d_pointsCnt_u, |
|||
int d_pointsCnt_v, int d_pointSize, float d_lastKnot_u, float d_lastKnot_v, int d_sampleCnt_u, |
|||
int d_sampleCnt_v); |
|||
|
|||
__global__ static void |
|||
g_derivative(float *derivatives, const float *derTexture_u, const float *derTexture_v, const float *nTexture_u, |
|||
const float *nTexture_v, |
|||
const float *d_points, int d_pointsCnt_u, int d_pointsCnt_v, int d_pointSize, float d_lastKnot_u, |
|||
float d_lastKnot_v, int d_sampleCnt_u, int d_sampleCnt_v); |
|||
|
|||
__global__ static void |
|||
g_curvature(const float *derivatives, int sampleCnt_u, int sampleCnt_v, float lastKnot_u, float lastKnot_v); |
|||
|
|||
class Evaluator { |
|||
private: |
|||
std::vector<std::vector<std::vector<float>>> controlPoints; |
|||
float *d_points; |
|||
std::vector<float> knots_u; |
|||
std::vector<float> knots_v; |
|||
float *d_knots_u; |
|||
float *d_knots_v; |
|||
bool recordTime; |
|||
|
|||
float *d_nTexture_u; // u方向指向度为p时的device中的nurbs基函数矩阵 |
|||
float *d_nTexture_v; // v方向指向度为p时的device中的nurbs基函数矩阵 |
|||
float *d_nTexture1_u; // u方向指向度为p-1时的device中的nurbs基函数矩阵 |
|||
float *d_nTexture1_v; // v方向指向度为p-1时的device中的nurbs基函数矩阵 |
|||
|
|||
float *d_derivatives; // 一阶导计算结果 |
|||
|
|||
// int sampleCnt_u; |
|||
// int sampleCnt_v; |
|||
|
|||
public: |
|||
/** |
|||
* 构造函数 |
|||
* @param controlPoints 控制点矩阵[pointsCnt_u][pointsCnt_v][3] |
|||
* @param knots_u u方向knots |
|||
* @param knots_v v方向knots |
|||
*/ |
|||
__host__ explicit Evaluator(std::vector<std::vector<std::vector<float>>> controlPoints, |
|||
std::vector<float> knots_u, std::vector<float> knots_v); |
|||
|
|||
/** |
|||
* 供外部CPU程序使用的、负责调用gpu并行计算的方法 |
|||
* @param sampleCnt_u u方向采样数目 |
|||
* @param sampleCnt_v v方向采样数目 |
|||
* @return 由 map 组成的vector{<<u, v>, {x, y, z}>} |
|||
*/ |
|||
__host__ std::vector<std::map<std::pair<float, float>, std::vector<float>>> |
|||
evaluate(int sampleCnt_u_, int sampleCnt_v_); |
|||
|
|||
/** |
|||
* 供外部CPU程序使用的、负责调用gpu并行计算切向量的方法 |
|||
*/ |
|||
__host__ void derivative(int sampleCnt_u, int sampleCnt_v); |
|||
|
|||
/** |
|||
* 供外部CPU程序使用的、负责调用gpu并行计算二阶导的方法 |
|||
*/ |
|||
__host__ void curvature(int sampleCnt_u, int sampleCnt_v); |
|||
|
|||
void setRecordTime(bool r); |
|||
|
|||
~Evaluator(); |
|||
|
|||
}; |
|||
} |
|||
|
|||
/** |
|||
* 曲线部分 |
|||
*/ |
|||
namespace NurbsCurve { |
|||
|
|||
__global__ void g_test(float *nTexture); |
|||
|
|||
/** |
|||
* 曲线计算的核函数 |
|||
* @param d_pointSize 点的大小(3: [x, y, z] | 4:[x, y, z, w]) |
|||
*/ |
|||
__global__ static void |
|||
g_evaluate(const float *NTexture, const float *d_points, int d_pointsCnt, int d_pointSize, |
|||
float d_lastKnot, int d_sampleCnt); |
|||
|
|||
__global__ static void |
|||
g_derivative(float *derivatives, const float *derTexture, const float *nTexture, const float *d_points, |
|||
int d_pointsCnt, int d_pointSize, float d_lastKnot, |
|||
int d_sampleCnt); |
|||
|
|||
__global__ static void g_curvature(const float *derivatives, int sampleCnt, float lastKnot); |
|||
|
|||
class Evaluator { |
|||
private: |
|||
std::vector<std::vector<float>> controlPoints; |
|||
std::vector<float> knots; |
|||
float *d_knots; |
|||
float *d_points; |
|||
bool recordTime; |
|||
|
|||
float *d_nTexture; // 指向度为p时的device中的nurbs基函数矩阵 |
|||
float *d_nTexture1; // 指向度为p-1时的device中的nurbs基函数矩阵 |
|||
|
|||
float *d_derivatives{}; // 一阶导计算结果 |
|||
|
|||
public: |
|||
/** |
|||
* 构造函数 |
|||
* @param controlPoints 控制点矩阵[pointsCnt][3] |
|||
*/ |
|||
__host__ explicit Evaluator(std::vector<std::vector<float>> controlPoints, std::vector<float> knots); |
|||
|
|||
/** |
|||
* 供外部CPU程序使用的、负责调用gpu并行进行evaluation的方法 |
|||
* @param sampleCnt_ 在参数域内均匀采样的采样数,它会更新成员变量中的sampleCnt |
|||
* @return 由 map 组成的vector{<u, {x, y, z}>} |
|||
*/ |
|||
__host__ std::vector<std::map<float, std::vector<float>>> evaluate(int sampleCnt_); |
|||
|
|||
/** |
|||
* 供外部CPU程序使用的、负责调用gpu并行计算切向量的方法 |
|||
*/ |
|||
__host__ void derivative(int sampleCnt); |
|||
|
|||
/** |
|||
* 供外部CPU程序使用的、负责调用gpu并行计算二阶导的方法 |
|||
*/ |
|||
__host__ void curvature(int sampleCnt); |
|||
|
|||
__host__ ~Evaluator(); |
|||
|
|||
void setRecordTime(bool r); |
|||
}; |
|||
} |
|||
|
|||
|
|||
|
|||
|
|||
|
|||
/** |
|||
* 计算并保存基函数值 |
|||
* @param nTexture 记录度数为p的基函数值,规模为【sampleCnt,pointsCnt】 |
|||
* @param nTexture1 记录度数为p-1的基函数值,规模为【sampleCnt+1,pointsCnt】 |
|||
*/ |
|||
__global__ static void |
|||
g_basisTexture(float *nTexture, float *nTexture1, const float *d_knots, int d_pointsCnt, int d_knotsCnt, |
|||
int d_sampleCnt); |
|||
|
|||
/** |
|||
* 计算并保存基函数对采样点切向量的分量值 |
|||
* @param derTexture 记录度数为p的Nurbs基函数对采样点切向量的分量值,大小为【sampleCnt,pointsCnt】 |
|||
* @param nTexture1 度数为p-1的基函数值,规模为【sampleCnt+1,pointsCnt】 |
|||
*/ |
|||
__global__ static void |
|||
g_derTexture(float *derTexture, const float *nTexture1, const float *d_knots, int d_pointsCnt, int d_knotsCnt, |
|||
int d_sampleCnt); |
|||
|
|||
/** |
|||
* 当u值已知时,根据基函数N的递推表达式,采用动态规划的方式求解N值 |
|||
* @param N_Texture 结果返回在N_Texture中 |
|||
*/ |
|||
__device__ void d_basisFunction(float *nTexture, const float *knots, float u, int degree, int d_knotsCnt); |
|||
|
|||
/** |
|||
* device中判断两个浮点数是否相等。与CPU中一样,GPU中的浮点数也存在很小的误差,直接使用==判断往往容易将相等误判为不等 |
|||
* @return true:相等 |
|||
*/ |
|||
__device__ bool d_floatEqual(float a, float b); |
|||
|
|||
#endif |
|||
@ -0,0 +1,23 @@ |
|||
// |
|||
// Created by 14727 on 2022/12/9. |
|||
// |
|||
|
|||
#ifndef NURBSEVALUATOR_DEVICEUTILS_CUH |
|||
#define NURBSEVALUATOR_DEVICEUTILS_CUH |
|||
|
|||
|
|||
__device__ void d_safeFree(float * &p); |
|||
|
|||
/** |
|||
* device中判断两个浮点数是否相等。与CPU中一样,GPU中的浮点数也存在很小的误差,直接使用==判断往往容易将相等误判为不等 |
|||
* @return true:相等 |
|||
*/ |
|||
__device__ bool d_floatEqual(float a, float b); |
|||
|
|||
/** |
|||
* 正则归一化,一般用于方向向量的归一化 |
|||
*/ |
|||
__device__ void normalization(float &a, float &b, float &c); |
|||
|
|||
|
|||
#endif //NURBSEVALUATOR_DEVICEUTILS_CUH |
|||
@ -0,0 +1,32 @@ |
|||
// |
|||
// Created by 14727 on 2022/11/19. |
|||
// |
|||
|
|||
#ifndef NURBSEVALUATOR_NURBSCOMMON_CUH |
|||
#define NURBSEVALUATOR_NURBSCOMMON_CUH |
|||
|
|||
/** |
|||
* 当u值已知时,根据基函数N的递推表达式,采用动态规划的方式求解N值 |
|||
* @param N_Texture 结果返回在N_Texture中 |
|||
*/ |
|||
__device__ void d_basisFunction(float *nTexture, const float *knots, float u, int degree, int d_knotsCnt); |
|||
|
|||
/** |
|||
* 计算并保存基函数值 |
|||
* @param nTexture 记录度数为p的基函数值,规模为【sampleCnt,pointsCnt】 |
|||
* @param nTexture1 记录度数为p-1的基函数值,规模为【sampleCnt+1,pointsCnt】 |
|||
*/ |
|||
__global__ void |
|||
g_basisTexture(float *nTexture, float *nTexture1, const float *d_knots, int d_pointsCnt, int d_knotsCnt, |
|||
int d_sampleCnt); |
|||
|
|||
/** |
|||
* 计算并保存基函数对采样点切向量的分量值 |
|||
* @param derTexture 记录度数为p的Nurbs基函数对采样点切向量的分量值,大小为【sampleCnt,pointsCnt】 |
|||
* @param nTexture1 度数为p-1的基函数值,规模为【sampleCnt+1,pointsCnt】 |
|||
*/ |
|||
__global__ void |
|||
g_derTexture(float *derTexture, const float *nTexture1, const float *d_knots, int d_pointsCnt, int d_knotsCnt, |
|||
int d_sampleCnt); |
|||
|
|||
#endif //NURBSEVALUATOR_NURBSCOMMON_CUH |
|||
@ -0,0 +1,73 @@ |
|||
// |
|||
// Created by 14727 on 2022/12/9. |
|||
// |
|||
|
|||
#ifndef NURBSEVALUATOR_NURBSCURVE_CUH |
|||
#define NURBSEVALUATOR_NURBSCURVE_CUH |
|||
|
|||
#include <cuda_runtime.h> |
|||
#include <vector> |
|||
//#include <map> |
|||
|
|||
namespace NurbsCurve { |
|||
const int POINT_SIZE = 4; |
|||
/** |
|||
* 曲线计算的核函数 |
|||
* @param d_pointSize 点的大小(3: [x, y, z] | 4:[x, y, z, w]) |
|||
*/ |
|||
__global__ static void |
|||
g_evaluate(float *res, const float *NTexture, const float *d_points, int d_pointsCnt, int d_pointSize, |
|||
float d_lastKnot, int d_sampleCnt); |
|||
|
|||
__global__ static void |
|||
g_derivative(float *derivatives, const float *derTexture, const float *nTexture, const float *d_points, |
|||
int d_pointsCnt, int d_pointSize, float d_lastKnot, |
|||
int d_sampleCnt); |
|||
|
|||
__global__ static void g_curvature(const float *derivatives, int sampleCnt, float lastKnot); |
|||
|
|||
class Curve { |
|||
private: |
|||
std::vector<std::vector<float>> controlPoints; |
|||
std::vector<float> knots; |
|||
float *d_knots; |
|||
float *d_points; |
|||
bool recordTime; |
|||
|
|||
float *d_nTexture; // 指向度为p时的device中的nurbs基函数矩阵 |
|||
float *d_nTexture1; // 指向度为p-1时的device中的nurbs基函数矩阵 |
|||
|
|||
float *d_derivatives{}; // 一阶导计算结果 |
|||
|
|||
public: |
|||
/** |
|||
* 构造函数 |
|||
* @param controlPoints 控制点矩阵[pointsCnt][3] |
|||
*/ |
|||
explicit Curve(std::vector<std::vector<float>> controlPoints, std::vector<float> knots); |
|||
|
|||
/** |
|||
* 供外部CPU程序使用的、负责调用gpu并行进行evaluation的方法 |
|||
* @param sampleCnt_ 在参数域内均匀采样的采样数,它会更新成员变量中的sampleCnt |
|||
* @return 由 map 组成的vector{<u, {x, y, z}>} |
|||
*/ |
|||
std::vector<std::vector<float>> evaluate(int sampleCnt_); |
|||
|
|||
/** |
|||
* 供外部CPU程序使用的、负责调用gpu并行计算切向量的方法 |
|||
*/ |
|||
void derivative(int sampleCnt); |
|||
|
|||
/** |
|||
* 供外部CPU程序使用的、负责调用gpu并行计算二阶导的方法 |
|||
*/ |
|||
void curvature(int sampleCnt); |
|||
|
|||
~Curve(); |
|||
|
|||
void setRecordTime(bool r); |
|||
}; |
|||
} |
|||
|
|||
|
|||
#endif //NURBSEVALUATOR_NURBSCURVE_CUH |
|||
@ -0,0 +1,85 @@ |
|||
// |
|||
// Created by 14727 on 2022/12/9. |
|||
// |
|||
|
|||
#ifndef NURBSEVALUATOR_NURBSSURFACE_CUH |
|||
#define NURBSEVALUATOR_NURBSSURFACE_CUH |
|||
#include <vector> |
|||
#include "cuda_runtime.h" |
|||
|
|||
namespace NurbsSurface { |
|||
const int POINT_SIZE = 4; |
|||
/** |
|||
* 曲线计算的核函数 |
|||
* @param d_pointSize 点的大小(3: [x, y, z] | 4:[x, y, z, w]) |
|||
*/ |
|||
__global__ static void |
|||
g_evaluate(float* res, const float *d_nTexture_u, const float *d_nTexture_v, const float *d_points, int d_pointsCnt_u, |
|||
int d_pointsCnt_v, int d_pointSize, float d_lastKnot_u, float d_lastKnot_v, int d_sampleCnt_u, |
|||
int d_sampleCnt_v); |
|||
|
|||
__global__ static void |
|||
g_derivative(float *derivatives, const float *derTexture_u, const float *derTexture_v, const float *nTexture_u, |
|||
const float *nTexture_v, |
|||
const float *d_points, int d_pointsCnt_u, int d_pointsCnt_v, int d_pointSize, float d_lastKnot_u, |
|||
float d_lastKnot_v, int d_sampleCnt_u, int d_sampleCnt_v); |
|||
|
|||
__global__ static void |
|||
g_curvature(const float *derivatives, int sampleCnt_u, int sampleCnt_v, float lastKnot_u, float lastKnot_v); |
|||
|
|||
class Surface { |
|||
private: |
|||
std::vector<std::vector<std::vector<float>>> controlPoints; |
|||
float *d_points; |
|||
std::vector<float> knots_u; |
|||
std::vector<float> knots_v; |
|||
float *d_knots_u; |
|||
float *d_knots_v; |
|||
bool recordTime; |
|||
|
|||
float *d_nTexture_u; // u方向指向度为p时的device中的nurbs基函数矩阵 |
|||
float *d_nTexture_v; // v方向指向度为p时的device中的nurbs基函数矩阵 |
|||
float *d_nTexture1_u; // u方向指向度为p-1时的device中的nurbs基函数矩阵 |
|||
float *d_nTexture1_v; // v方向指向度为p-1时的device中的nurbs基函数矩阵 |
|||
|
|||
float *d_derivatives; // 一阶导计算结果 |
|||
|
|||
public: |
|||
/** |
|||
* 构造函数 |
|||
* @param controlPoints 控制点矩阵[pointsCnt_u][pointsCnt_v][3] |
|||
* @param knots_u u方向knots |
|||
* @param knots_v v方向knots |
|||
*/ |
|||
__host__ explicit Surface(std::vector<std::vector<std::vector<float>>> controlPoints, |
|||
std::vector<float> knots_u, std::vector<float> knots_v); |
|||
|
|||
/** |
|||
* 供外部CPU程序使用的、负责调用gpu并行计算的方法 |
|||
* @param sampleCnt_u u方向采样数目 |
|||
* @param sampleCnt_v v方向采样数目 |
|||
* @return 由 map 组成的vector{<<u, v>, {x, y, z}>} |
|||
*/ |
|||
__host__ std::vector<std::vector<std::vector<float>>> |
|||
evaluate(int sampleCnt_u_, int sampleCnt_v_); |
|||
|
|||
/** |
|||
* 供外部CPU程序使用的、负责调用gpu并行计算切向量的方法 |
|||
*/ |
|||
__host__ void derivative(int sampleCnt_u, int sampleCnt_v); |
|||
|
|||
/** |
|||
* 供外部CPU程序使用的、负责调用gpu并行计算二阶导的方法 |
|||
*/ |
|||
__host__ void curvature(int sampleCnt_u, int sampleCnt_v); |
|||
|
|||
void setRecordTime(bool r); |
|||
|
|||
~Surface(); |
|||
|
|||
}; |
|||
} |
|||
|
|||
|
|||
|
|||
#endif //NURBSEVALUATOR_NURBSSURFACE_CUH |
|||
@ -0,0 +1,22 @@ |
|||
#ifndef UNTITLED1_UTILS_H |
|||
#define UNTITLED1_UTILS_H |
|||
|
|||
#define IN_UNIX 0 // 确定当前运行的操作系统(需要通过系统调用获得时间)
|
|||
|
|||
#if IN_UNIX |
|||
#include <sys/time.h> |
|||
#include <ctime> |
|||
double get_time(); |
|||
#else |
|||
#include <windows.h> |
|||
double get_time(); |
|||
#endif |
|||
|
|||
/**
|
|||
* 保证释放后的指针指向空。这样一来保证指针不乱指,free的时候不会出错、二来可以判断指针是否已经free |
|||
* 注意指针是引用传参,因为要把指针本身置空 |
|||
*/ |
|||
void safeCudaFree(float *&p); |
|||
void safeFree(float *&p); |
|||
|
|||
#endif //UNTITLED1_UTILS_H
|
|||
@ -0,0 +1,26 @@ |
|||
// |
|||
// Created by 14727 on 2022/12/9. |
|||
// |
|||
|
|||
#include "../../include/device/DeviceUtils.cuh" |
|||
|
|||
__device__ void d_safeFree(float *&p) { |
|||
if (p != nullptr) { |
|||
free(p); |
|||
p = nullptr; |
|||
} |
|||
} |
|||
|
|||
__device__ bool d_floatEqual(float a, float b) { |
|||
return abs(a - b) < 0.00001; |
|||
} |
|||
|
|||
__device__ void normalization(float &a, float &b, float &c) { |
|||
float sumA = a * a; |
|||
float sumB = b * b; |
|||
float sumC = c * c; |
|||
float sum = sumA + sumB + sumC; |
|||
a = sqrt(sumA / sum); |
|||
b = sqrt(sumB / sum); |
|||
c = sqrt(sumC / sum); |
|||
} |
|||
@ -0,0 +1,76 @@ |
|||
// |
|||
// Created by 14727 on 2022/11/19. |
|||
// |
|||
|
|||
#include "../../include/device/NurbsCommon.cuh" |
|||
#include "../../include/device/DeviceUtils.cuh" |
|||
|
|||
__device__ void d_basisFunction(float *N_Texture, const float *knots, float u, int degree, int d_knotsCnt) { |
|||
int m = d_knotsCnt - 1; |
|||
for (int p = 0; p <= degree; p++) { |
|||
for (int i = 0; i + p <= m - 1; i++) { |
|||
if (p == 0) { |
|||
if ((u > knots[i] || d_floatEqual(u, knots[i])) && (u < knots[i + 1]) |
|||
|| |
|||
d_floatEqual(u, knots[i + 1]) && d_floatEqual(u, knots[m])) { |
|||
N_Texture[p * m + i] = 1.0; |
|||
} else { |
|||
N_Texture[p * m + i] = 0.0; |
|||
} |
|||
} else { |
|||
float Nip_1 = N_Texture[(p - 1) * m + i]; |
|||
float Ni1p_1 = N_Texture[(p - 1) * m + i + 1]; |
|||
float left = d_floatEqual(knots[i + p], knots[i]) ? 0 : (u - knots[i]) * Nip_1 / |
|||
(knots[i + p] - knots[i]); |
|||
float right = d_floatEqual(knots[i + p + 1], knots[i + 1]) ? 0 : (knots[i + p + 1] - u) * Ni1p_1 / |
|||
(knots[i + p + 1] - knots[i + 1]); |
|||
N_Texture[p * m + i] = left + right; |
|||
} |
|||
} |
|||
} |
|||
} |
|||
|
|||
__global__ void g_basisTexture(float *nTexture, float *nTexture1, const float *d_knots, int d_pointsCnt, int d_knotsCnt, |
|||
int d_sampleCnt) { |
|||
// 一维grid和一维block |
|||
int idx = blockIdx.x * blockDim.x + threadIdx.x; // 采样点编号 |
|||
float d_paramCeil = d_knots[d_knotsCnt - 1]; |
|||
float u = idx * d_paramCeil / (d_sampleCnt - 1); |
|||
if (u > 1.0 * d_paramCeil) { |
|||
return; |
|||
} |
|||
int d_degree = d_knotsCnt - 1 - d_pointsCnt; |
|||
auto *N_dp = (float *) malloc((d_degree + 1) * (d_knotsCnt - 1) * sizeof(float)); |
|||
d_basisFunction(N_dp, d_knots, u, d_degree, d_knotsCnt); |
|||
for (int i = 0; i < d_pointsCnt; i++) { |
|||
nTexture[idx * d_pointsCnt + i] = N_dp[d_degree * (d_knotsCnt - 1) + i]; |
|||
nTexture1[idx * (d_pointsCnt + 1) + i] = N_dp[(d_degree - 1) * (d_knotsCnt - 1) + i]; |
|||
// printf("nTexture1: %g ", nTexture1[idx * (d_pointsCnt + 1) + i]); |
|||
} |
|||
nTexture1[idx * (d_pointsCnt + 1) + d_pointsCnt] = N_dp[(d_degree - 1) * (d_knotsCnt - 1) + |
|||
d_pointsCnt]; // nTexture1多记录一列数据 |
|||
d_safeFree(N_dp); |
|||
} |
|||
|
|||
__global__ void |
|||
g_derTexture(float *derTexture, const float *nTexture1, const float *d_knots, int d_pointsCnt, int d_knotsCnt, |
|||
int d_sampleCnt) { |
|||
// 一维grid和一维block |
|||
int idx = blockIdx.x * blockDim.x + threadIdx.x; // 采样点编号 |
|||
if (idx >= d_sampleCnt) { |
|||
return; |
|||
} |
|||
int degree = d_knotsCnt - 1 - d_pointsCnt; |
|||
|
|||
// printf("degree: %d\n", degree); |
|||
for (int i = 0; i < d_pointsCnt; i++) { |
|||
float left = d_floatEqual(d_knots[i + degree], d_knots[i]) ? 0 : |
|||
nTexture1[idx * (d_pointsCnt + 1) + i] * (degree - 1) / (d_knots[i + degree] - d_knots[i]); |
|||
float right = d_floatEqual(d_knots[i + degree + 1], d_knots[i + 1]) ? 0 : |
|||
nTexture1[idx * (d_pointsCnt + 1) + i + 1] * (degree - 1) / |
|||
(d_knots[i + degree + 1] - d_knots[i + 1]); |
|||
derTexture[idx * d_pointsCnt + i] = left - right; |
|||
// printf("<%d, %d> -- %g \n", idx, i, left - right); |
|||
// printf("nTex1: %g \n", nTexture1[idx * (d_pointsCnt + 1) + i]); |
|||
} |
|||
} |
|||
@ -0,0 +1,283 @@ |
|||
// |
|||
// Created by 14727 on 2022/12/9. |
|||
// |
|||
#include "../../include/device/NurbsCommon.cuh" |
|||
#include "../../include/device/NurbsCurve.cuh" |
|||
#include "../../include/utils.h" |
|||
|
|||
__global__ void |
|||
NurbsCurve::g_evaluate(float *res, const float *NTexture, const float *d_points, const int d_pointsCnt, |
|||
const int d_POINT_SIZE, const float d_lastKnot, const int d_sampleCnt) { |
|||
// printf(" curve calculating... \n"); |
|||
// 二维grid和一维的block |
|||
// int idx = (blockIdx.y * gridDim.x + blockIdx.x) * blockDim.x + threadIdx.x; |
|||
// 二维block和一维grid |
|||
int idx = blockIdx.x * blockDim.x * blockDim.y + threadIdx.y * blockDim.x + threadIdx.x; |
|||
float u = idx * d_lastKnot / (d_sampleCnt - 1); |
|||
if (u > 1.0 * d_lastKnot) { |
|||
return; |
|||
} |
|||
// |
|||
// int d_degree = d_knotsCnt - 1 - d_pointsCnt; |
|||
// // 注意,在device中,全局内存还是以malloc和free的方式分配和回收的,而不是使用cudaMalloc和cudaFree |
|||
// auto *N_dp = (float *) malloc((d_degree + 1) * (d_knotsCnt - 1) * sizeof(float)); |
|||
// d_basisFunction(N_dp, d_knots, u, d_degree, d_knotsCnt); |
|||
float x = 0., y = 0., z = 0., sumW = 0.; |
|||
for (int i = 0; i < d_pointsCnt; i++) { |
|||
float N = NTexture[idx * d_pointsCnt + i]; |
|||
int baseIdx = i * d_POINT_SIZE; |
|||
float w = d_points[baseIdx + 3]; |
|||
x += N * w * d_points[baseIdx]; |
|||
y += N * w * d_points[baseIdx + 1]; |
|||
z += N * w * d_points[baseIdx + 2]; |
|||
sumW += N * w; |
|||
} |
|||
x = x / sumW; |
|||
y = y / sumW; |
|||
z = z / sumW; |
|||
int baseIdx = idx * 3; // 这里的结果点大小应该固定为3 |
|||
res[baseIdx] = x; |
|||
res[baseIdx + 1] = y; |
|||
res[baseIdx + 2] = z; |
|||
printf("(%g)-->(%g, %g, %g)\n", u, x, y, z); // %g输出,舍弃无意义的0 |
|||
} |
|||
|
|||
__global__ void |
|||
NurbsCurve::g_derivative(float *derivatives, const float *derTexture, const float *nTexture, const float *d_points, |
|||
int d_pointsCnt, int d_POINT_SIZE, |
|||
float d_lastKnot, int d_sampleCnt) { |
|||
// 二维block和一维grid |
|||
int idx = blockIdx.x * blockDim.x * blockDim.y + threadIdx.y * blockDim.x + threadIdx.x; |
|||
if (idx >= d_sampleCnt) return; |
|||
float u = idx * d_lastKnot / (d_sampleCnt - 1); |
|||
float nubs_dx = 0., nubs_dy = 0., nubs_dz = 0., nubs_dw = 0.; |
|||
|
|||
for (int i = 0; i < d_pointsCnt; i++) { |
|||
int baseIdx = i * d_POINT_SIZE; |
|||
float nFactor = derTexture[idx * d_pointsCnt + i]; |
|||
float wi = d_points[baseIdx + 3]; |
|||
nubs_dx += nFactor * wi * d_points[baseIdx]; |
|||
nubs_dy += nFactor * wi * d_points[baseIdx + 1]; |
|||
nubs_dz += nFactor * wi * d_points[baseIdx + 2]; |
|||
nubs_dw += nFactor * wi; |
|||
} |
|||
|
|||
float x = 0., y = 0., z = 0., w = 0.; |
|||
for (int i = 0; i < d_pointsCnt; i++) { |
|||
float N = nTexture[idx * d_pointsCnt + i]; |
|||
int baseIdx = i * d_POINT_SIZE; |
|||
float wi = d_points[baseIdx + 3]; |
|||
x += N * wi * d_points[baseIdx]; |
|||
y += N * wi * d_points[baseIdx + 1]; |
|||
z += N * wi * d_points[baseIdx + 2]; |
|||
w += N * wi; |
|||
} |
|||
float dx = (nubs_dx * w - x * nubs_dw) / (w * w); |
|||
float dy = (nubs_dy * w - y * nubs_dw) / (w * w); |
|||
float dz = (nubs_dz * w - z * nubs_dw) / (w * w); |
|||
int baseIdx = idx * 3; |
|||
derivatives[baseIdx] = dx; |
|||
derivatives[baseIdx + 1] = dy; |
|||
derivatives[baseIdx + 2] = dz; |
|||
printf("(%g)-->(%g, %g, %g)\n", u, dx, dy, dz); |
|||
} |
|||
|
|||
__global__ void NurbsCurve::g_curvature(const float *derivatives, int sampleCnt, float lastKnot) { |
|||
// 二维block和一维grid |
|||
int idx = blockIdx.x * blockDim.x * blockDim.y + threadIdx.y * blockDim.x + threadIdx.x; |
|||
if (idx >= sampleCnt) return; |
|||
float step = lastKnot / (sampleCnt - 1); |
|||
float u = idx * step; |
|||
float sndPdx, sndPdy, sndPdz; // 二阶导 |
|||
int baseIdx = idx * 3, lastBaseIdx = (idx - 1) * 3, nextBaseIdx = (idx + 1) * 3; |
|||
if (idx == 0) { |
|||
sndPdx = (derivatives[nextBaseIdx] - derivatives[baseIdx]) / step; |
|||
sndPdy = (derivatives[nextBaseIdx + 1] - derivatives[baseIdx + 1]) / step; |
|||
sndPdz = (derivatives[nextBaseIdx + 2] - derivatives[baseIdx + 2]) / step; |
|||
} else if (idx == sampleCnt - 1) { |
|||
sndPdx = (derivatives[baseIdx] - derivatives[lastBaseIdx]) / step; |
|||
sndPdy = (derivatives[baseIdx + 1] - derivatives[lastBaseIdx + 1]) / step; |
|||
sndPdz = (derivatives[baseIdx + 2] - derivatives[lastBaseIdx + 2]) / step; |
|||
} else { |
|||
sndPdx = (derivatives[nextBaseIdx] - derivatives[lastBaseIdx]) / (2 * step); |
|||
sndPdy = (derivatives[nextBaseIdx + 1] - derivatives[lastBaseIdx + 1]) / (2 * step); |
|||
sndPdz = (derivatives[nextBaseIdx + 2] - derivatives[lastBaseIdx + 2]) / (2 * step); |
|||
} |
|||
printf("%g --> (%g, %g, %g)\n", u, sndPdx, sndPdy, sndPdz); |
|||
} |
|||
|
|||
NurbsCurve::Curve::Curve(std::vector<std::vector<float>> controlPoints, |
|||
std::vector<float> knots) { |
|||
this->knots = std::move(knots); |
|||
this->controlPoints = std::move(controlPoints); |
|||
recordTime = false; |
|||
d_nTexture = nullptr; |
|||
d_nTexture1 = nullptr; |
|||
d_points = nullptr; |
|||
d_knots = nullptr; |
|||
d_derivatives = nullptr; |
|||
} |
|||
|
|||
void NurbsCurve::Curve::setRecordTime(bool r) { |
|||
recordTime = r; |
|||
} |
|||
|
|||
NurbsCurve::Curve::~Curve() { |
|||
safeCudaFree(d_nTexture); |
|||
safeCudaFree(d_nTexture1); |
|||
safeCudaFree(d_points); |
|||
safeCudaFree(d_knots); |
|||
cudaDeviceReset(); |
|||
} |
|||
|
|||
std::vector<std::vector<float>> |
|||
NurbsCurve::Curve::evaluate(int sampleCnt) { |
|||
|
|||
if (POINT_SIZE != controlPoints[0].size()) { |
|||
printf("Error! Nurbs控制点应表示为长度为4的齐次坐标\n"); |
|||
return {}; |
|||
} |
|||
// 构造指向device的controlPoints |
|||
const int pointsCnt = controlPoints.size(); |
|||
const int pointsBytes = pointsCnt * POINT_SIZE * sizeof(float); |
|||
auto *h_points = (float *) malloc(pointsBytes); |
|||
for (int i = 0; i < pointsCnt; i++) { |
|||
for (int j = 0; j < POINT_SIZE; j++) { |
|||
h_points[i * POINT_SIZE + j] = controlPoints[i][j]; |
|||
} |
|||
} |
|||
safeCudaFree(d_points); // 注意内存管理 |
|||
cudaMalloc((void **) &d_points, pointsBytes); |
|||
cudaMemcpy(d_points, h_points, pointsBytes, cudaMemcpyHostToDevice); |
|||
|
|||
// 构造指向device的knots |
|||
const int knotsCnt = knots.size(); |
|||
const int knotsBytes = knotsCnt * sizeof(float); |
|||
auto *h_knots = (float *) malloc(knotsBytes); |
|||
for (int i = 0; i < knotsCnt; i++) h_knots[i] = knots[i]; |
|||
safeCudaFree(d_knots); // 注意内存管理 |
|||
cudaMalloc((void **) &d_knots, knotsBytes); |
|||
cudaMemcpy(d_knots, h_knots, knotsBytes, cudaMemcpyHostToDevice); |
|||
|
|||
// 分配nTexture的内存。只需要GPU内存 |
|||
safeCudaFree(d_nTexture); // 注意内存管理 |
|||
cudaMalloc((void **) &d_nTexture, |
|||
sampleCnt * pointsCnt * sizeof(float)); |
|||
|
|||
// 分配nTexture1的内存。只需要GPU内存 |
|||
safeCudaFree(d_nTexture1); // 注意内存管理 |
|||
cudaMalloc((void **) &d_nTexture1, sampleCnt * (pointsCnt + 1) * sizeof(float)); // 注意nTexture的大小,在算梯度时用得到i=pointsCnt + 1的基函数值 |
|||
|
|||
// 结果数组 |
|||
size_t resBytes = sampleCnt * 3 * sizeof(float); |
|||
float *d_res; |
|||
cudaMalloc((void **)&d_res, resBytes); |
|||
auto *h_res = (float *)malloc(resBytes); |
|||
|
|||
|
|||
// 构造g_basisTexture线程层级 |
|||
dim3 blockBasis(512); |
|||
dim3 gridBasis((sampleCnt + blockBasis.x - 1) / blockBasis.x); |
|||
|
|||
// 构造线程层级 |
|||
dim3 block(32, 32); |
|||
dim3 grid((sampleCnt + block.x * block.y - 1) / (block.x * block.y)); |
|||
// 记录用时 |
|||
double time_cost_device; |
|||
if (recordTime) time_cost_device = get_time(); |
|||
printf("there..\n"); |
|||
g_basisTexture <<<gridBasis, blockBasis>>>(d_nTexture, d_nTexture1, d_knots, pointsCnt, knotsCnt, sampleCnt); |
|||
// cudaMemcpy(d_nTextureCpy, d_nTexture, nTextureBytes, cudaMemcpyDeviceToDevice); // 有同步功能 |
|||
cudaDeviceSynchronize(); |
|||
printf("here..\n"); |
|||
g_evaluate <<<grid, block>>>(d_res, d_nTexture, d_points, pointsCnt, POINT_SIZE, knots[knotsCnt - 1], sampleCnt); |
|||
// g_test<<<1,6>>>(d_nTextureCpy); |
|||
cudaDeviceSynchronize(); // 所用线程结束后再获取结束时间。cudaThreadSynchronize()在CUDA1.0后被弃用 |
|||
if (recordTime) { |
|||
time_cost_device = get_time() - time_cost_device; |
|||
printf("GPU time cost of curve evaluation for %d samples: %lf\n", sampleCnt, time_cost_device); |
|||
} |
|||
|
|||
cudaMemcpy(h_res, d_res, resBytes, cudaMemcpyDeviceToHost); |
|||
std::vector<std::vector<float>> res(sampleCnt, std::vector<float>(3, 0.)); |
|||
for(int i = 0; i < sampleCnt; i++) { |
|||
int baseIdx = i * 3; |
|||
res[i][0] = h_res[baseIdx]; |
|||
res[i][1] = h_res[baseIdx + 1]; |
|||
res[i][2] = h_res[baseIdx + 2]; |
|||
} |
|||
|
|||
safeFree(h_points); |
|||
safeFree(h_knots); |
|||
safeCudaFree(d_res); |
|||
safeFree(h_res); |
|||
return res; |
|||
} |
|||
|
|||
void NurbsCurve::Curve::derivative(int sampleCnt) { |
|||
// 先完成evaluation |
|||
evaluate(sampleCnt); |
|||
|
|||
if (POINT_SIZE != controlPoints[0].size()) { |
|||
printf("Error! Nurbs控制点应表示为长度为4的齐次坐标\n"); |
|||
return; |
|||
} |
|||
|
|||
float *d_derTexture = nullptr; |
|||
const int pointsCnt = controlPoints.size(); |
|||
const int knotsCnt = knots.size(); |
|||
cudaMalloc((void **) &d_derTexture, sampleCnt * pointsCnt * sizeof(float)); |
|||
|
|||
// 构造线程层级 |
|||
dim3 block(32, 32); |
|||
dim3 grid((sampleCnt + block.x * block.y - 1) / (block.x * block.y)); |
|||
// 构造g_basisTexture线程层级 |
|||
dim3 blockTex(512); |
|||
dim3 gridTex((sampleCnt + blockTex.x - 1) / blockTex.x); |
|||
|
|||
|
|||
// 构造切向量计算结果 |
|||
safeCudaFree(d_derivatives); |
|||
cudaMalloc((void **) &d_derivatives, sampleCnt * 3 * sizeof(float)); // 每个采用所求的切向量是一个三维向量 |
|||
|
|||
// 记录用时 |
|||
double time_cost_device; |
|||
if (recordTime) time_cost_device = get_time(); |
|||
g_derTexture<<<gridTex, blockTex>>>(d_derTexture, d_nTexture1, d_knots, pointsCnt, knotsCnt, sampleCnt); |
|||
cudaDeviceSynchronize(); |
|||
g_derivative<<<grid, block>>>(d_derivatives, d_derTexture, d_nTexture, d_points, pointsCnt, POINT_SIZE, |
|||
knots[knotsCnt - 1], sampleCnt); |
|||
cudaDeviceSynchronize(); // 所用线程结束后再获取结束时间。cudaThreadSynchronize()在CUDA1.0后被弃用 |
|||
if (recordTime) { |
|||
time_cost_device = get_time() - time_cost_device; |
|||
printf("GPU time cost of curve first derivative calculating for %d samples: %lf\n", sampleCnt, |
|||
time_cost_device); |
|||
} |
|||
cudaFree(d_derTexture); |
|||
} |
|||
|
|||
void NurbsCurve::Curve::curvature(int sampleCnt) { |
|||
// 先计算切向量 |
|||
derivative(sampleCnt); |
|||
|
|||
if (POINT_SIZE != controlPoints[0].size()) { |
|||
printf("Error! Nurbs控制点应表示为长度为4的齐次坐标\n"); |
|||
return; |
|||
} |
|||
|
|||
// 构造线程层级 |
|||
dim3 block(32, 32); |
|||
dim3 grid((sampleCnt + block.x * block.y - 1) / (block.x * block.y)); |
|||
|
|||
// 记录用时 |
|||
double time_cost_device; |
|||
if (recordTime) time_cost_device = get_time(); |
|||
g_curvature<<<grid, block>>>(d_derivatives, sampleCnt, knots[knots.size() - 1]); |
|||
cudaDeviceSynchronize(); // 所用线程结束后再获取结束时间。cudaThreadSynchronize()在CUDA1.0后被弃用 |
|||
if (recordTime) { |
|||
time_cost_device = get_time() - time_cost_device; |
|||
printf("GPU time cost of curve second derivative calculating for %d samples: %lf\n", sampleCnt, |
|||
time_cost_device); |
|||
} |
|||
} |
|||
|
|||
@ -0,0 +1,424 @@ |
|||
// |
|||
// Created by 14727 on 2022/12/9. |
|||
// |
|||
|
|||
#include "../../include/device/NurbsSurface.cuh" |
|||
#include "../../include/device/NurbsCommon.cuh" |
|||
#include "../../include/device/DeviceUtils.cuh" |
|||
#include "../../include/utils.h" |
|||
|
|||
__global__ void |
|||
NurbsSurface::g_evaluate(float* res, const float *d_nTexture_u, const float *d_nTexture_v, const float *d_points, int d_pointsCnt_u, |
|||
int d_pointsCnt_v, int d_POINT_SIZE, float d_lastKnot_u, float d_lastKnot_v, int d_sampleCnt_u, |
|||
int d_sampleCnt_v) { |
|||
|
|||
// 二维grid和二维的block |
|||
int ix = blockIdx.x * blockDim.x + threadIdx.x; |
|||
int iy = blockIdx.y * blockDim.y + threadIdx.y; |
|||
|
|||
float u = ix * d_lastKnot_u / (d_sampleCnt_u - 1); |
|||
float v = iy * d_lastKnot_v / (d_sampleCnt_v - 1); |
|||
|
|||
if (u > 1.0 * d_lastKnot_u || v > 1.0 * d_lastKnot_v) { |
|||
return; |
|||
} |
|||
|
|||
float x = 0., y = 0., z = 0., sumW = 0.; |
|||
for (int i = 0; i < d_pointsCnt_u; i++) { |
|||
float N_U = d_nTexture_u[ix * d_pointsCnt_u + i]; |
|||
for (int j = 0; j < d_pointsCnt_v; j++) { |
|||
float N_V = d_nTexture_v[iy * d_pointsCnt_v + j]; |
|||
int idx = (i * d_pointsCnt_v + j) * d_POINT_SIZE; |
|||
float w = d_points[idx + 3]; |
|||
x += N_U * N_V * w * d_points[idx]; |
|||
y += N_U * N_V * w * d_points[idx + 1]; |
|||
z += N_U * N_V * w * d_points[idx + 2]; |
|||
sumW += N_U * N_V * w; |
|||
} |
|||
} |
|||
x = x / sumW; |
|||
y = y / sumW; |
|||
z = z / sumW; |
|||
|
|||
// int baseIdx = (ix * d_sampleCnt_v + iy) * 3; |
|||
// res[baseIdx] = x; |
|||
// res[baseIdx + 1] = y; |
|||
// res[baseIdx + 2] = z; |
|||
|
|||
// printf("(%d, %d)-->(%g, %g, %g)\n", ix, iy, x, y, z); // %g输出,舍弃无意义的0 |
|||
} |
|||
|
|||
__global__ void |
|||
NurbsSurface::g_derivative(float *derivatives, const float *derTexture_u, const float *derTexture_v, |
|||
const float *nTexture_u, const float *nTexture_v, const float *d_points, int d_pointsCnt_u, |
|||
int d_pointsCnt_v, int d_POINT_SIZE, float d_lastKnot_u, float d_lastKnot_v, |
|||
int d_sampleCnt_u, int d_sampleCnt_v) { |
|||
// 二维grid和二维的block |
|||
int ix = blockIdx.x * blockDim.x + threadIdx.x; |
|||
int iy = blockIdx.y * blockDim.y + threadIdx.y; |
|||
|
|||
if (ix >= d_sampleCnt_u || iy >= d_sampleCnt_v) { |
|||
return; |
|||
} |
|||
float u = ix * d_lastKnot_u / (d_sampleCnt_u - 1); |
|||
float v = iy * d_lastKnot_v / (d_sampleCnt_v - 1); |
|||
|
|||
float nubsPdx_u = 0., nubsPdy_u = 0, nubsPdz_u = 0., nubsPdw_u = 0.; |
|||
float nubsPdx_v = 0., nubsPdy_v = 0, nubsPdz_v = 0., nubsPdw_v = 0.; |
|||
|
|||
for (int i = 0; i < d_pointsCnt_u; i++) { |
|||
for (int j = 0; j < d_pointsCnt_u; j++) { |
|||
int baseIdx = (i * d_pointsCnt_v + j) * d_POINT_SIZE; |
|||
float factor_u = derTexture_u[ix * d_pointsCnt_u + i] * nTexture_v[iy * d_pointsCnt_v + j]; |
|||
float factor_v = derTexture_v[iy * d_pointsCnt_v + j] * nTexture_u[ix * d_pointsCnt_u + i]; |
|||
float wij = d_points[baseIdx + 3]; |
|||
nubsPdx_u += factor_u * wij * d_points[baseIdx]; |
|||
nubsPdy_u += factor_u * wij * d_points[baseIdx + 1]; |
|||
nubsPdz_u += factor_u * wij * d_points[baseIdx + 2]; |
|||
nubsPdw_u += factor_u * wij; |
|||
|
|||
nubsPdx_v += factor_v * wij * d_points[baseIdx]; |
|||
nubsPdy_v += factor_v * wij * d_points[baseIdx + 1]; |
|||
nubsPdz_v += factor_v * wij * d_points[baseIdx + 2]; |
|||
nubsPdw_v += factor_v * wij; |
|||
} |
|||
} |
|||
|
|||
float x = 0., y = 0., z = 0., w = 0.; |
|||
for (int i = 0; i < d_pointsCnt_u; i++) { |
|||
float N_U = nTexture_u[ix * d_pointsCnt_u + i]; |
|||
for (int j = 0; j < d_pointsCnt_v; j++) { |
|||
float N_V = nTexture_v[iy * d_pointsCnt_v + j]; |
|||
int idx = (i * d_pointsCnt_v + j) * d_POINT_SIZE; |
|||
float wij = d_points[idx + 3]; |
|||
x += N_U * N_V * wij * d_points[idx]; |
|||
y += N_U * N_V * wij * d_points[idx + 1]; |
|||
z += N_U * N_V * wij * d_points[idx + 2]; |
|||
w += N_U * N_V * wij; |
|||
} |
|||
} |
|||
|
|||
float w2 = w * w; |
|||
float pdx_u = (nubsPdx_u * w - x * nubsPdw_u) / w2; |
|||
float pdy_u = (nubsPdy_u * w - y * nubsPdw_u) / w2; |
|||
float pdz_u = (nubsPdz_u * w - z * nubsPdw_u) / w2; |
|||
|
|||
float pdx_v = (nubsPdx_v * w - x * nubsPdw_v) / w2; |
|||
float pdy_v = (nubsPdy_v * w - y * nubsPdw_v) / w2; |
|||
float pdz_v = (nubsPdz_v * w - z * nubsPdw_v) / w2; |
|||
// float pdz_u = (nubsPdz_u * w - z ) |
|||
|
|||
int baseIdx = (ix * d_sampleCnt_v + iy) * 6; |
|||
derivatives[baseIdx] = pdx_u; |
|||
derivatives[baseIdx + 1] = pdy_u; |
|||
derivatives[baseIdx + 2] = pdz_u; |
|||
derivatives[baseIdx + 3] = pdx_v; |
|||
derivatives[baseIdx + 4] = pdy_v; |
|||
derivatives[baseIdx + 5] = pdz_v; |
|||
|
|||
float x_n = pdy_u * pdz_v - pdy_v * pdz_u, y_n = pdx_v * pdz_u - pdx_u * pdz_v, z_n = |
|||
pdx_u * pdy_v - pdx_v * pdy_u; // 叉乘得到法向量 |
|||
if ((ix == 8 && iy == 9) || (ix == 7 && iy == 9) || (ix == 9 && iy == 9) || (ix == 8 && iy == 8) || |
|||
(ix == 8 && iy == 10)) |
|||
printf("(%g,%g)-->u:(%g, %g, %g), v:(%g,%g,%g), normal:(%g,%g,%g)\n", u, v, pdx_u, pdy_u, pdz_u, pdx_v, pdy_v, |
|||
pdz_v, x_n, y_n, z_n); |
|||
} |
|||
|
|||
__global__ void |
|||
NurbsSurface::g_curvature(const float *derivatives, int sampleCnt_u, int sampleCnt_v, float lastKnot_u, |
|||
float lastKnot_v) { |
|||
// 二维grid和二维的block |
|||
int ix = blockIdx.x * blockDim.x + threadIdx.x; |
|||
int iy = blockIdx.y * blockDim.y + threadIdx.y; |
|||
|
|||
if (ix >= sampleCnt_u || iy >= sampleCnt_v) { |
|||
return; |
|||
} |
|||
|
|||
|
|||
float step_u = lastKnot_u / (sampleCnt_u - 1), step_v = lastKnot_v / (sampleCnt_v - 1); |
|||
float u = ix * step_u, v = iy * step_v; |
|||
|
|||
int baseIdx = (ix * sampleCnt_v + iy) * 6; |
|||
int lastBaseIdx_u = ((ix - 1) * sampleCnt_v + iy) * 6, nextBaseIdx_u = ((ix + 1) * sampleCnt_v + iy) * 6; |
|||
int lastBaseIdx_v = (ix * sampleCnt_v + iy - 1) * 6, nextBaseIdx_v = (ix * sampleCnt_v + iy + 1) * 6; |
|||
|
|||
// printf("(%g,%g)-->u:(%g, %g, %g), v:(%g,%g,%g)\n", u, v, derivatives[baseIdx], derivatives[baseIdx + 1], |
|||
// derivatives[baseIdx + 2], derivatives[baseIdx + 3], derivatives[baseIdx + 4], derivatives[baseIdx + 5]); |
|||
|
|||
float sndPdx_uu, sndPdy_uu, sndPdz_uu, sndPdx_vv, sndPdy_vv, sndPdz_vv; // 二阶导 |
|||
float sndPdx_uv, sndPdy_uv, sndPdz_uv, sndPdx_vu, sndPdy_vu, sndPdz_vu; |
|||
|
|||
if (ix == 0) { |
|||
sndPdx_uu = (derivatives[nextBaseIdx_u] - derivatives[baseIdx]) / step_u; |
|||
sndPdy_uu = (derivatives[nextBaseIdx_u + 1] - derivatives[baseIdx + 1]) / step_u; |
|||
sndPdz_uu = (derivatives[nextBaseIdx_u + 2] - derivatives[baseIdx + 2]) / step_u; |
|||
|
|||
sndPdx_vu = (derivatives[nextBaseIdx_u + 3] - derivatives[baseIdx + 3]) / step_u; |
|||
sndPdy_vu = (derivatives[nextBaseIdx_u + 4] - derivatives[baseIdx + 4]) / step_u; |
|||
sndPdz_vu = (derivatives[nextBaseIdx_u + 5] - derivatives[baseIdx + 5]) / step_u; |
|||
} else if (ix == sampleCnt_u - 1) { |
|||
sndPdx_uu = (derivatives[baseIdx] - derivatives[lastBaseIdx_u]) / step_u; |
|||
sndPdy_uu = (derivatives[baseIdx + 1] - derivatives[lastBaseIdx_u + 1]) / step_u; |
|||
sndPdz_uu = (derivatives[baseIdx + 2] - derivatives[lastBaseIdx_u + 2]) / step_u; |
|||
|
|||
sndPdx_vu = (derivatives[baseIdx + 3] - derivatives[lastBaseIdx_u + 3]) / step_u; |
|||
sndPdy_vu = (derivatives[baseIdx + 4] - derivatives[lastBaseIdx_u + 4]) / step_u; |
|||
sndPdz_vu = (derivatives[baseIdx + 5] - derivatives[lastBaseIdx_u + 5]) / step_u; |
|||
} else { |
|||
sndPdx_uu = (derivatives[nextBaseIdx_u] - derivatives[lastBaseIdx_u]) / (2 * step_u); |
|||
sndPdy_uu = (derivatives[nextBaseIdx_u + 1] - derivatives[lastBaseIdx_u + 1]) / (2 * step_u); |
|||
sndPdz_uu = (derivatives[nextBaseIdx_u + 2] - derivatives[lastBaseIdx_u + 2]) / (2 * step_u); |
|||
|
|||
sndPdx_vu = (derivatives[nextBaseIdx_u + 3] - derivatives[lastBaseIdx_u + 3]) / (2 * step_u); |
|||
sndPdy_vu = (derivatives[nextBaseIdx_u + 4] - derivatives[lastBaseIdx_u + 4]) / (2 * step_u); |
|||
sndPdz_vu = (derivatives[nextBaseIdx_u + 5] - derivatives[lastBaseIdx_u + 5]) / (2 * step_u); |
|||
} |
|||
|
|||
if (iy == 0) { |
|||
sndPdx_vv = (derivatives[nextBaseIdx_v + 3] - derivatives[baseIdx + 3]) / step_v; |
|||
sndPdy_vv = (derivatives[nextBaseIdx_v + 4] - derivatives[baseIdx + 4]) / step_v; |
|||
sndPdz_vv = (derivatives[nextBaseIdx_v + 5] - derivatives[baseIdx + 5]) / step_v; |
|||
|
|||
sndPdx_uv = (derivatives[nextBaseIdx_v] - derivatives[baseIdx]) / step_v; |
|||
sndPdy_uv = (derivatives[nextBaseIdx_v + 1] - derivatives[baseIdx + 1]) / step_v; |
|||
sndPdz_uv = (derivatives[nextBaseIdx_v + 2] - derivatives[baseIdx + 2]) / step_v; |
|||
} else if (iy == sampleCnt_v - 1) { |
|||
sndPdx_vv = (derivatives[baseIdx + 3] - derivatives[lastBaseIdx_v + 3]) / step_v; |
|||
sndPdy_vv = (derivatives[baseIdx + 4] - derivatives[lastBaseIdx_v + 4]) / step_v; |
|||
sndPdz_vv = (derivatives[baseIdx + 5] - derivatives[lastBaseIdx_v + 5]) / step_v; |
|||
|
|||
sndPdx_uv = (derivatives[baseIdx] - derivatives[lastBaseIdx_v]) / step_v; |
|||
sndPdy_uv = (derivatives[baseIdx + 1] - derivatives[lastBaseIdx_v + 1]) / step_v; |
|||
sndPdz_uv = (derivatives[baseIdx + 2] - derivatives[lastBaseIdx_v + 2]) / step_v; |
|||
} else { |
|||
sndPdx_vv = (derivatives[nextBaseIdx_v + 3] - derivatives[lastBaseIdx_v + 3]) / (2 * step_v); |
|||
sndPdy_vv = (derivatives[nextBaseIdx_v + 4] - derivatives[lastBaseIdx_v + 4]) / (2 * step_v); |
|||
sndPdz_vv = (derivatives[nextBaseIdx_v + 5] - derivatives[lastBaseIdx_v + 5]) / (2 * step_v); |
|||
|
|||
sndPdx_uv = (derivatives[nextBaseIdx_v] - derivatives[lastBaseIdx_v]) / (2 * step_v); |
|||
sndPdy_uv = (derivatives[nextBaseIdx_v + 1] - derivatives[lastBaseIdx_v + 1]) / (2 * step_v); |
|||
sndPdz_uv = (derivatives[nextBaseIdx_v + 2] - derivatives[lastBaseIdx_v + 2]) / (2 * step_v); |
|||
} |
|||
|
|||
float uvx = (sndPdx_uv + sndPdx_vu) / 2, uvy = (sndPdy_uv + sndPdy_vu) / 2, uvz = (sndPdz_uv + sndPdz_vu) / 2; |
|||
normalization(sndPdx_uu, sndPdy_uu, sndPdz_uu); |
|||
normalization(uvx, uvy, uvz); |
|||
normalization(sndPdx_vv, sndPdy_vv, sndPdz_vv); |
|||
|
|||
|
|||
if (ix == 8 && iy == 9) |
|||
printf("(%g, %g) --> uu: (%g, %g, %g), uv: (%g, %g, %g), vv: (%g, %g, %g)\n", u, v, sndPdx_uu, sndPdy_uu, |
|||
sndPdz_uu, |
|||
uvx, uvy, uvz, sndPdx_vv, sndPdy_vv, sndPdz_vv); |
|||
} |
|||
|
|||
|
|||
__host__ NurbsSurface::Surface::Surface(std::vector<std::vector<std::vector<float>>> controlPoints, |
|||
std::vector<float> knots_u, std::vector<float> knots_v) { |
|||
this->knots_u = std::move(knots_u); |
|||
this->knots_v = std::move(knots_v); |
|||
this->controlPoints = std::move(controlPoints); |
|||
recordTime = false; |
|||
d_nTexture_u = nullptr; |
|||
d_nTexture_v = nullptr; |
|||
d_nTexture1_u = nullptr; |
|||
d_nTexture1_v = nullptr; |
|||
d_knots_u = nullptr; |
|||
d_knots_v = nullptr; |
|||
d_points = nullptr; |
|||
d_derivatives = nullptr; |
|||
} |
|||
|
|||
__host__ std::vector<std::vector<std::vector<float>>> |
|||
NurbsSurface::Surface::evaluate(int sampleCnt_u, int sampleCnt_v) { |
|||
if (POINT_SIZE != controlPoints[0][0].size()) { |
|||
printf("Error! Nurbs控制点应表示为长度为4的齐次坐标\n"); |
|||
return {}; |
|||
} |
|||
|
|||
// 构造指向device的controlPoints |
|||
const int pointsCnt_u = controlPoints.size(), pointsCnt_v = controlPoints[0].size(); |
|||
const int pointsBytes = pointsCnt_u * pointsCnt_v * POINT_SIZE * sizeof(float); |
|||
auto *h_points = (float *) malloc(pointsBytes); |
|||
for (int i = 0; i < pointsCnt_u; i++) { |
|||
for (int j = 0; j < pointsCnt_v; j++) { |
|||
for (int k = 0; k < POINT_SIZE; k++) { |
|||
h_points[(i * pointsCnt_v + j) * POINT_SIZE + k] = controlPoints[i][j][k]; |
|||
} |
|||
} |
|||
} |
|||
cudaMalloc((void **) &d_points, pointsBytes); |
|||
cudaMemcpy(d_points, h_points, pointsBytes, cudaMemcpyHostToDevice); |
|||
|
|||
// 构造指向device的knots |
|||
const int knotsCnt_u = knots_u.size(), knotsCnt_v = knots_v.size(); |
|||
const int knotsBytes_u = knotsCnt_u * sizeof(float), knotsBytes_v = knotsCnt_v * sizeof(float); |
|||
auto *h_knots_u = (float *) malloc(knotsBytes_u), *h_knots_v = (float *) malloc(knotsBytes_v); |
|||
for (int i = 0; i < knotsCnt_u; i++) h_knots_u[i] = knots_u[i]; |
|||
for (int i = 0; i < knotsCnt_v; i++) h_knots_v[i] = knots_v[i]; |
|||
|
|||
cudaMalloc((void **) &d_knots_u, knotsBytes_u); |
|||
cudaMalloc((void **) &d_knots_v, knotsBytes_v); |
|||
cudaMemcpy(d_knots_u, h_knots_u, knotsBytes_u, cudaMemcpyHostToDevice); |
|||
cudaMemcpy(d_knots_v, h_knots_v, knotsBytes_v, cudaMemcpyHostToDevice); |
|||
|
|||
// 构造nTexture |
|||
cudaMalloc((void **) &d_nTexture_u, |
|||
sampleCnt_u * pointsCnt_u * sizeof(float)); // 注意nTexture的大小,在算梯度时用得到i=pointsCnt + 1的基函数值 |
|||
cudaMalloc((void **) &d_nTexture_v, sampleCnt_v * pointsCnt_v * sizeof(float)); |
|||
|
|||
// 构造nTexture1 |
|||
cudaMalloc((void **) &d_nTexture1_u, sampleCnt_u * (pointsCnt_u + 1) * sizeof(float)); |
|||
cudaMalloc((void **) &d_nTexture1_v, sampleCnt_v * (pointsCnt_v + 1) * sizeof(float)); |
|||
|
|||
// 结果数组 |
|||
size_t resBytes = sampleCnt_u * sampleCnt_v * 3 * sizeof(float); |
|||
float *d_res; |
|||
cudaMalloc((void **)&d_res, resBytes); |
|||
auto *h_res = (float *)malloc(resBytes); |
|||
|
|||
|
|||
// 构造g_basisTexture线程层级 |
|||
dim3 blockBasis(512); |
|||
dim3 gridBasis_u((sampleCnt_u + blockBasis.x - 1) / blockBasis.x); |
|||
dim3 gridBasis_v((sampleCnt_v + blockBasis.x - 1) / blockBasis.x); |
|||
|
|||
// 构造线程层级,调用核函数 |
|||
dim3 block(32, 32); |
|||
dim3 grid((sampleCnt_u + block.x - 1) / block.x, (sampleCnt_v + block.y - 1) / block.y); |
|||
// 记录用时 |
|||
double time_cost_device; |
|||
g_basisTexture<<<gridBasis_u, blockBasis>>>(d_nTexture_u, d_nTexture1_u, d_knots_u, pointsCnt_u, knotsCnt_u, |
|||
sampleCnt_u); |
|||
cudaDeviceSynchronize(); |
|||
g_basisTexture<<<gridBasis_v, blockBasis>>>(d_nTexture_v, d_nTexture1_v, d_knots_v, pointsCnt_v, knotsCnt_v, |
|||
sampleCnt_v); |
|||
cudaDeviceSynchronize(); |
|||
|
|||
if (recordTime) time_cost_device = get_time(); |
|||
g_evaluate <<<grid, block>>>(d_res, d_nTexture_u, d_nTexture_v, d_points, pointsCnt_u, pointsCnt_v, POINT_SIZE, |
|||
knots_u[knotsCnt_u - 1], knots_v[knotsCnt_v - 1], sampleCnt_u, sampleCnt_v); |
|||
cudaDeviceSynchronize(); // 所用线程结束后再获取结束时间。cudaThreadSynchronize()在CUDA1.0后被弃用 |
|||
if (recordTime) { |
|||
time_cost_device = get_time() - time_cost_device; |
|||
printf("GPU time cost of surface evaluation for %d samples: %lf\n", sampleCnt_u * sampleCnt_v, |
|||
time_cost_device); |
|||
} |
|||
|
|||
cudaMemcpy(h_res, d_res, resBytes, cudaMemcpyDeviceToHost); |
|||
std::vector<std::vector<std::vector<float>>> res(sampleCnt_u, std::vector<std::vector<float>>(sampleCnt_v, std::vector<float>(3, 0.))); |
|||
for(int i = 0; i < sampleCnt_u; i++) { |
|||
int baseIdx = i * sampleCnt_v * 3; |
|||
for(int j = 0; j < sampleCnt_v; j++) { |
|||
baseIdx += j * 3; |
|||
res[i][j][0] = h_res[baseIdx]; |
|||
res[i][j][1] = h_res[baseIdx + 1]; |
|||
res[i][j][2] = h_res[baseIdx + 2]; |
|||
// printf("%d, %d: %f, %f, %f\n", i, j, res[i][j][0], res[i][j][1], res[i][j][2]); |
|||
} |
|||
} |
|||
|
|||
// 释放内存 |
|||
safeFree(h_points); |
|||
safeFree(h_knots_u); |
|||
safeFree(h_knots_v); |
|||
safeCudaFree(d_res); |
|||
safeFree(h_res); |
|||
|
|||
return res; |
|||
} |
|||
|
|||
__host__ void NurbsSurface::Surface::derivative(int sampleCnt_u, int sampleCnt_v) { |
|||
// 先完成evaluation |
|||
evaluate(sampleCnt_u, sampleCnt_v); |
|||
|
|||
if (POINT_SIZE != controlPoints[0][0].size()) { |
|||
printf("Error! Nurbs控制点应表示为长度为4的齐次坐标\n"); |
|||
return; |
|||
} |
|||
|
|||
float *d_derTexture_u = nullptr; |
|||
float *d_derTexture_v = nullptr; |
|||
const int pointsCnt_u = controlPoints.size(), pointsCnt_v = controlPoints[0].size(); |
|||
const int knotsCnt_u = knots_u.size(), knotsCnt_v = knots_v.size(); |
|||
cudaMalloc((void **) &d_derTexture_u, sampleCnt_u * pointsCnt_u * sizeof(float)); |
|||
cudaMalloc((void **) &d_derTexture_v, sampleCnt_v * pointsCnt_v * sizeof(float)); |
|||
|
|||
// 构造切向量计算结果 |
|||
safeCudaFree(d_derivatives); |
|||
cudaMalloc((void **) &d_derivatives, |
|||
sampleCnt_v * sampleCnt_u * 6 * sizeof(float)); // 每个采用所求的切向量是一个六元向量,前三位是对u的偏导、后三位是对v的偏导 |
|||
|
|||
// 构造线程层级 |
|||
dim3 block(32, 32); |
|||
dim3 grid((sampleCnt_u + block.x - 1) / block.x, (sampleCnt_v + block.y - 1) / block.y); |
|||
// 构造g_basisTexture线程层级 |
|||
dim3 blockTex(512); |
|||
dim3 gridTex_u((sampleCnt_u + blockTex.x - 1) / blockTex.x); |
|||
dim3 gridTex_v((sampleCnt_v + blockTex.x - 1) / blockTex.x); |
|||
// 记录用时 |
|||
double time_cost_device; |
|||
if (recordTime) time_cost_device = get_time(); |
|||
g_derTexture<<<gridTex_u, blockTex>>>(d_derTexture_u, d_nTexture1_u, d_knots_u, pointsCnt_u, knotsCnt_u, |
|||
sampleCnt_u); |
|||
g_derTexture<<<gridTex_v, blockTex>>>(d_derTexture_v, d_nTexture1_v, d_knots_v, pointsCnt_v, knotsCnt_v, |
|||
sampleCnt_v); |
|||
cudaDeviceSynchronize(); |
|||
g_derivative<<<grid, block>>>(d_derivatives, d_derTexture_u, d_derTexture_v, d_nTexture_u, d_nTexture_v, d_points, |
|||
pointsCnt_u, |
|||
pointsCnt_v, POINT_SIZE, knots_u[knotsCnt_u - 1], knots_v[knotsCnt_v - 1], |
|||
sampleCnt_u, |
|||
sampleCnt_v); |
|||
cudaDeviceSynchronize(); // 所用线程结束后再获取结束时间。cudaThreadSynchronize()在CUDA1.0后被弃用 |
|||
if (recordTime) { |
|||
time_cost_device = get_time() - time_cost_device; |
|||
printf("GPU time cost of surface first derivative calculating for %d samples: %lf\n", sampleCnt_u * sampleCnt_v, |
|||
time_cost_device); |
|||
} |
|||
cudaFree(d_derTexture_u); |
|||
cudaFree(d_derTexture_v); |
|||
} |
|||
|
|||
|
|||
|
|||
__host__ void NurbsSurface::Surface::curvature(int sampleCnt_u, int sampleCnt_v) { |
|||
// 先计算切向量 |
|||
derivative(sampleCnt_u, sampleCnt_v); |
|||
if (POINT_SIZE != controlPoints[0][0].size()) { |
|||
printf("Error! Nurbs控制点应表示为长度为4的齐次坐标\n"); |
|||
return; |
|||
} |
|||
|
|||
// 构造线程层级 |
|||
dim3 block(32, 32); |
|||
dim3 grid((sampleCnt_u + block.x - 1) / block.x, (sampleCnt_v + block.y - 1) / block.y); |
|||
|
|||
// 记录用时 |
|||
double time_cost_device; |
|||
if (recordTime) time_cost_device = get_time(); |
|||
g_curvature<<<grid, block>>>(d_derivatives, sampleCnt_u, sampleCnt_v, knots_u[knots_u.size() - 1], |
|||
knots_v[knots_v.size() - 1]); |
|||
cudaDeviceSynchronize(); // 所用线程结束后再获取结束时间。cudaThreadSynchronize()在CUDA1.0后被弃用 |
|||
if (recordTime) { |
|||
time_cost_device = get_time() - time_cost_device; |
|||
printf("GPU time cost of surface second derivative calculating for %d samples: %lf\n", |
|||
sampleCnt_u * sampleCnt_v, |
|||
time_cost_device); |
|||
} |
|||
} |
|||
|
|||
void NurbsSurface::Surface::setRecordTime(bool r) { |
|||
recordTime = r; |
|||
} |
|||
|
|||
NurbsSurface::Surface::~Surface() { |
|||
safeCudaFree(d_nTexture_u); |
|||
safeCudaFree(d_nTexture_v); |
|||
safeCudaFree(d_nTexture1_u); |
|||
safeCudaFree(d_nTexture1_v); |
|||
safeCudaFree(d_points); |
|||
safeCudaFree(d_knots_u); |
|||
safeCudaFree(d_knots_v); |
|||
cudaDeviceReset(); |
|||
} |
|||
@ -0,0 +1,46 @@ |
|||
#include "../include/utils.h" |
|||
#include "cuda_runtime.h" |
|||
|
|||
#if IN_UNIX |
|||
double get_time() { |
|||
struct timeval tv{}; |
|||
double t; |
|||
|
|||
gettimeofday(&tv, (struct timezone *) nullptr); |
|||
t = tv.tv_sec + (double) tv.tv_usec * 1e-6; |
|||
|
|||
return t; |
|||
} |
|||
#else |
|||
double get_time() { |
|||
LARGE_INTEGER timer; |
|||
static LARGE_INTEGER fre; |
|||
static int init = 0; |
|||
double t; |
|||
|
|||
if (init != 1) { |
|||
QueryPerformanceFrequency(&fre); |
|||
init = 1; |
|||
} |
|||
|
|||
QueryPerformanceCounter(&timer); |
|||
|
|||
t = timer.QuadPart * 1. / fre.QuadPart; |
|||
|
|||
return t; |
|||
} |
|||
#endif |
|||
|
|||
void safeCudaFree(float *&p) { |
|||
if (p != nullptr) { |
|||
cudaFree(p); |
|||
p = nullptr; |
|||
} |
|||
} |
|||
|
|||
void safeFree(float *&p) { |
|||
if (p != nullptr) { |
|||
free(p); |
|||
p = nullptr; |
|||
} |
|||
} |
|||
@ -1,19 +0,0 @@ |
|||
#include "utils.h" |
|||
|
|||
double utils::get_time_windows() { |
|||
LARGE_INTEGER timer; |
|||
static LARGE_INTEGER fre; |
|||
static int init = 0; |
|||
double t; |
|||
|
|||
if (init != 1) { |
|||
QueryPerformanceFrequency(&fre); |
|||
init = 1; |
|||
} |
|||
|
|||
QueryPerformanceCounter(&timer); |
|||
|
|||
t = timer.QuadPart * 1. / fre.QuadPart; |
|||
|
|||
return t; |
|||
} |
|||
@ -1,11 +0,0 @@ |
|||
#ifndef UNTITLED1_UTILS_H |
|||
#define UNTITLED1_UTILS_H |
|||
#include <windows.h> |
|||
|
|||
|
|||
namespace utils { |
|||
double get_time_windows(); |
|||
} |
|||
|
|||
|
|||
#endif //UNTITLED1_UTILS_H
|
|||
Loading…
Reference in new issue