diff --git a/NurbsEvaluator.cu b/NurbsEvaluator.cu index ce2cadf..da9c233 100644 --- a/NurbsEvaluator.cu +++ b/NurbsEvaluator.cu @@ -9,6 +9,16 @@ #include "utils.h" //#include "NurbsBasis.cuh" +__device__ void normalization(float &a, float &b, float &c) { + float sumA = a * a; + float sumB = b * b; + float sumC = c * c; + float sum = sumA + sumB + sumC; + a = sqrt(sumA / sum); + b = sqrt(sumB / sum); + c = sqrt(sumC / sum); +} + //extern __device__ void NurbsBasis::d_basisFunction(float *N_Texture, const float *knots, float u, int degree, int d_knotsCnt) {}; __host__ NurbsSurface::Evaluator::Evaluator(std::vector>> controlPoints, @@ -24,23 +34,24 @@ __host__ NurbsSurface::Evaluator::Evaluator(std::vector, std::vector>> -NurbsSurface::Evaluator::evaluate(int sampleCnt_u_, int sampleCnt_v_) { - sampleCnt_u = sampleCnt_u_; - sampleCnt_v = sampleCnt_v_; - printf("outside printf..\n"); -// NurbsBasis::myPrint11(1, 3); +NurbsSurface::Evaluator::evaluate(int sampleCnt_u, int sampleCnt_v) { + if (POINT_SIZE != controlPoints[0][0].size()) { + printf("Error! Nurbs控制点应表示为长度为4的齐次坐标\n"); + return {}; + } // 构造指向device的controlPoints - const int pointsCnt_u = controlPoints.size(), pointsCnt_v = controlPoints[0].size(), pointSize = controlPoints[0][0].size(); - const int pointsBytes = pointsCnt_u * pointsCnt_v * pointSize * sizeof(float); + const int pointsCnt_u = controlPoints.size(), pointsCnt_v = controlPoints[0].size(); + const int pointsBytes = pointsCnt_u * pointsCnt_v * POINT_SIZE * sizeof(float); auto *h_points = (float *) malloc(pointsBytes); for (int i = 0; i < pointsCnt_u; i++) { for (int j = 0; j < pointsCnt_v; j++) { - for (int k = 0; k < pointSize; k++) { - h_points[(i * pointsCnt_v + j) * pointSize + k] = controlPoints[i][j][k]; + for (int k = 0; k < POINT_SIZE; k++) { + h_points[(i * pointsCnt_v + j) * POINT_SIZE + k] = controlPoints[i][j][k]; } } } @@ -86,7 +97,7 @@ NurbsSurface::Evaluator::evaluate(int sampleCnt_u_, int sampleCnt_v_) { cudaDeviceSynchronize(); if (recordTime) time_cost_device = utils::get_time_windows(); - g_evaluate <<>>(d_nTexture_u, d_nTexture_v, d_points, pointsCnt_u, pointsCnt_v, pointSize, + g_evaluate <<>>(d_nTexture_u, d_nTexture_v, d_points, pointsCnt_u, pointsCnt_v, POINT_SIZE, knots_u[knotsCnt_u - 1], knots_v[knotsCnt_v - 1], sampleCnt_u, sampleCnt_v); cudaDeviceSynchronize(); // 所用线程结束后再获取结束时间。cudaThreadSynchronize()在CUDA1.0后被弃用 if (recordTime) { @@ -100,25 +111,27 @@ NurbsSurface::Evaluator::evaluate(int sampleCnt_u_, int sampleCnt_v_) { free(h_knots_u); free(h_knots_v); printf("First derivatives and normal vectors calculated by GPU:\n"); - derivative(); - - cudaDeviceReset(); return {}; } __host__ std::vector>> -NurbsCurve::Evaluator::evaluate(int sampleCnt_) { - this->sampleCnt = sampleCnt_; +NurbsCurve::Evaluator::evaluate(int sampleCnt) { + + if (POINT_SIZE != controlPoints[0].size()) { + printf("Error! Nurbs控制点应表示为长度为4的齐次坐标\n"); + return {}; + } // 构造指向device的controlPoints - const int pointsCnt = controlPoints.size(), pointSize = controlPoints[0].size(); - const int pointsBytes = pointsCnt * pointSize * sizeof(float); + const int pointsCnt = controlPoints.size(); + const int pointsBytes = pointsCnt * POINT_SIZE * sizeof(float); auto *h_points = (float *) malloc(pointsBytes); for (int i = 0; i < pointsCnt; i++) { - for (int j = 0; j < pointSize; j++) { - h_points[i * pointSize + j] = controlPoints[i][j]; + for (int j = 0; j < POINT_SIZE; j++) { + h_points[i * POINT_SIZE + j] = controlPoints[i][j]; } } + myCudaFree(d_points); // 注意内存管理 cudaMalloc((void **) &d_points, pointsBytes); cudaMemcpy(d_points, h_points, pointsBytes, cudaMemcpyHostToDevice); @@ -127,16 +140,19 @@ NurbsCurve::Evaluator::evaluate(int sampleCnt_) { const int knotsBytes = knotsCnt * sizeof(float); auto *h_knots = (float *) malloc(knotsBytes); for (int i = 0; i < knotsCnt; i++) h_knots[i] = knots[i]; + myCudaFree(d_knots); // 注意内存管理 cudaMalloc((void **) &d_knots, knotsBytes); cudaMemcpy(d_knots, h_knots, knotsBytes, cudaMemcpyHostToDevice); // 分配nTexture的内存。只需要GPU内存 // float *d_nTexture = nullptr; + myCudaFree(d_nTexture); // 注意内存管理 cudaMalloc((void **) &d_nTexture, sampleCnt * pointsCnt * sizeof(float)); // 注意nTexture的大小,在算梯度时用得到i=pointsCnt + 1的基函数值 // 分配nTexture1的内存。只需要GPU内存 // float *d_nTexture1 = nullptr; + myCudaFree(d_nTexture1); // 注意内存管理 cudaMalloc((void **) &d_nTexture1, sampleCnt * (pointsCnt + 1) * sizeof(float)); // 构造g_basisTexture线程层级 @@ -154,7 +170,7 @@ NurbsCurve::Evaluator::evaluate(int sampleCnt_) { // cudaMemcpy(d_nTextureCpy, d_nTexture, nTextureBytes, cudaMemcpyDeviceToDevice); // 有同步功能 cudaDeviceSynchronize(); printf("here..\n"); - g_evaluate <<>>(d_nTexture, d_points, pointsCnt, pointSize, knots[knotsCnt - 1], sampleCnt); + g_evaluate <<>>(d_nTexture, d_points, pointsCnt, POINT_SIZE, knots[knotsCnt - 1], sampleCnt); // g_test<<<1,6>>>(d_nTextureCpy); cudaDeviceSynchronize(); // 所用线程结束后再获取结束时间。cudaThreadSynchronize()在CUDA1.0后被弃用 if (recordTime) { @@ -163,22 +179,30 @@ NurbsCurve::Evaluator::evaluate(int sampleCnt_) { } free(h_points); free(h_knots); - - printf("First derivatives calculated by GPU:\n"); - derivative(); - - cudaDeviceReset(); return {}; } -__host__ void NurbsSurface::Evaluator::derivative() { +__host__ void NurbsSurface::Evaluator::derivative(int sampleCnt_u, int sampleCnt_v) { + // 先完成evaluation + evaluate(sampleCnt_u, sampleCnt_v); + + if (POINT_SIZE != controlPoints[0][0].size()) { + printf("Error! Nurbs控制点应表示为长度为4的齐次坐标\n"); + return; + } + float *d_derTexture_u = nullptr; float *d_derTexture_v = nullptr; - const int pointsCnt_u = controlPoints.size(), pointsCnt_v = controlPoints[0].size(), pointSize = controlPoints[0][0].size(); + const int pointsCnt_u = controlPoints.size(), pointsCnt_v = controlPoints[0].size(); const int knotsCnt_u = knots_u.size(), knotsCnt_v = knots_v.size(); cudaMalloc((void **) &d_derTexture_u, sampleCnt_u * pointsCnt_u * sizeof(float)); cudaMalloc((void **) &d_derTexture_v, sampleCnt_v * pointsCnt_v * sizeof(float)); + // 构造切向量计算结果 + myCudaFree(d_derivatives); + cudaMalloc((void **) &d_derivatives, + sampleCnt_v * sampleCnt_u * 6 * sizeof(float)); // 每个采用所求的切向量是一个六元向量,前三位是对u的偏导、后三位是对v的偏导 + // 构造线程层级 dim3 block(32, 32); dim3 grid((sampleCnt_u + block.x - 1) / block.x, (sampleCnt_v + block.y - 1) / block.y); @@ -194,8 +218,10 @@ __host__ void NurbsSurface::Evaluator::derivative() { g_derTexture<<>>(d_derTexture_v, d_nTexture1_v, d_knots_v, pointsCnt_v, knotsCnt_v, sampleCnt_v); cudaDeviceSynchronize(); - g_derivative<<>>(d_derTexture_u, d_derTexture_v, d_nTexture_u, d_nTexture_v, d_points, pointsCnt_u, - pointsCnt_v, pointSize, knots_u[knotsCnt_u - 1], knots_v[knotsCnt_v - 1], sampleCnt_u, + g_derivative<<>>(d_derivatives, d_derTexture_u, d_derTexture_v, d_nTexture_u, d_nTexture_v, d_points, + pointsCnt_u, + pointsCnt_v, POINT_SIZE, knots_u[knotsCnt_u - 1], knots_v[knotsCnt_v - 1], + sampleCnt_u, sampleCnt_v); cudaDeviceSynchronize(); // 所用线程结束后再获取结束时间。cudaThreadSynchronize()在CUDA1.0后被弃用 if (recordTime) { @@ -207,9 +233,17 @@ __host__ void NurbsSurface::Evaluator::derivative() { cudaFree(d_derTexture_v); } -__host__ void NurbsCurve::Evaluator::derivative() { +__host__ void NurbsCurve::Evaluator::derivative(int sampleCnt) { + // 先完成evaluation + evaluate(sampleCnt); + + if (POINT_SIZE != controlPoints[0].size()) { + printf("Error! Nurbs控制点应表示为长度为4的齐次坐标\n"); + return; + } + float *d_derTexture = nullptr; - const int pointsCnt = controlPoints.size(), pointSize = controlPoints[0].size(); + const int pointsCnt = controlPoints.size(); const int knotsCnt = knots.size(); cudaMalloc((void **) &d_derTexture, sampleCnt * pointsCnt * sizeof(float)); @@ -219,12 +253,19 @@ __host__ void NurbsCurve::Evaluator::derivative() { // 构造g_basisTexture线程层级 dim3 blockTex(512); dim3 gridTex((sampleCnt + blockTex.x - 1) / blockTex.x); + + + // 构造切向量计算结果 + myCudaFree(d_derivatives); + cudaMalloc((void **) &d_derivatives, sampleCnt * 3 * sizeof(float)); // 每个采用所求的切向量是一个三维向量 + // 记录用时 double time_cost_device; if (recordTime) time_cost_device = utils::get_time_windows(); g_derTexture<<>>(d_derTexture, d_nTexture1, d_knots, pointsCnt, knotsCnt, sampleCnt); cudaDeviceSynchronize(); - g_derivative<<>>(d_derTexture, d_points, pointsCnt, pointSize, knots[knotsCnt - 1], sampleCnt); + g_derivative<<>>(d_derivatives, d_derTexture, d_nTexture, d_points, pointsCnt, POINT_SIZE, + knots[knotsCnt - 1], sampleCnt); cudaDeviceSynchronize(); // 所用线程结束后再获取结束时间。cudaThreadSynchronize()在CUDA1.0后被弃用 if (recordTime) { time_cost_device = utils::get_time_windows() - time_cost_device; @@ -234,11 +275,62 @@ __host__ void NurbsCurve::Evaluator::derivative() { cudaFree(d_derTexture); } +__host__ void NurbsSurface::Evaluator::curvature(int sampleCnt_u, int sampleCnt_v) { + // 先计算切向量 + derivative(sampleCnt_u, sampleCnt_v); + if (POINT_SIZE != controlPoints[0][0].size()) { + printf("Error! Nurbs控制点应表示为长度为4的齐次坐标\n"); + return; + } + + // 构造线程层级 + dim3 block(32, 32); + dim3 grid((sampleCnt_u + block.x - 1) / block.x, (sampleCnt_v + block.y - 1) / block.y); + + // 记录用时 + double time_cost_device; + if (recordTime) time_cost_device = utils::get_time_windows(); + g_curvature<<>>(d_derivatives, sampleCnt_u, sampleCnt_v, knots_u[knots_u.size() - 1], + knots_v[knots_v.size() - 1]); + cudaDeviceSynchronize(); // 所用线程结束后再获取结束时间。cudaThreadSynchronize()在CUDA1.0后被弃用 + if (recordTime) { + time_cost_device = utils::get_time_windows() - time_cost_device; + printf("GPU time cost of surface second derivative calculating for %d samples: %lf\n", + sampleCnt_u * sampleCnt_v, + time_cost_device); + } +} + +__host__ void NurbsCurve::Evaluator::curvature(int sampleCnt) { + // 先计算切向量 + derivative(sampleCnt); + + if (POINT_SIZE != controlPoints[0].size()) { + printf("Error! Nurbs控制点应表示为长度为4的齐次坐标\n"); + return; + } + + // 构造线程层级 + dim3 block(32, 32); + dim3 grid((sampleCnt + block.x * block.y - 1) / (block.x * block.y)); + + // 记录用时 + double time_cost_device; + if (recordTime) time_cost_device = utils::get_time_windows(); + g_curvature<<>>(d_derivatives, sampleCnt, knots[knots.size() - 1]); + cudaDeviceSynchronize(); // 所用线程结束后再获取结束时间。cudaThreadSynchronize()在CUDA1.0后被弃用 + if (recordTime) { + time_cost_device = utils::get_time_windows() - time_cost_device; + printf("GPU time cost of curve second derivative calculating for %d samples: %lf\n", sampleCnt, + time_cost_device); + } +} + //__global__ void //NurbsSurface::g_evaluate(const float *d_points, const float *d_knots_u, const float *d_knots_v, // int d_pointsCnt_u, -// int d_pointsCnt_v, int d_pointSize, int d_knotsCnt_u, int d_knotsCnt_v, +// int d_pointsCnt_v, int d_POINT_SIZE, int d_knotsCnt_u, int d_knotsCnt_v, // int d_sampleCnt_u, int d_sampleCnt_v) { //// printf(" surface calculating... \n"); // // 二维grid和二维的block @@ -267,7 +359,7 @@ __host__ void NurbsCurve::Evaluator::derivative() { // for (int j = 0; j < d_pointsCnt_v; j++) { // float N_U = N_Texture_U[d_degree_u * (d_knotsCnt_u - 1) + i]; // float N_V = N_Texture_V[d_degree_v * (d_knotsCnt_v - 1) + j]; -// int idx = (i * d_pointsCnt_v + j) * d_pointSize; +// int idx = (i * d_pointsCnt_v + j) * d_POINT_SIZE; // x += N_U * N_V * d_points[idx]; // y += N_U * N_V * d_points[idx + 1]; // z += N_U * N_V * d_points[idx + 2]; @@ -280,7 +372,7 @@ __host__ void NurbsCurve::Evaluator::derivative() { __global__ void NurbsSurface::g_evaluate(const float *d_nTexture_u, const float *d_nTexture_v, const float *d_points, int d_pointsCnt_u, - int d_pointsCnt_v, int d_pointSize, float d_lastKnot_u, float d_lastKnot_v, int d_sampleCnt_u, + int d_pointsCnt_v, int d_POINT_SIZE, float d_lastKnot_u, float d_lastKnot_v, int d_sampleCnt_u, int d_sampleCnt_v) { // 二维grid和二维的block @@ -294,25 +386,30 @@ NurbsSurface::g_evaluate(const float *d_nTexture_u, const float *d_nTexture_v, c return; } - float x = 0., y = 0., z = 0.; + float x = 0., y = 0., z = 0., sumW = 0.; for (int i = 0; i < d_pointsCnt_u; i++) { float N_U = d_nTexture_u[ix * d_pointsCnt_u + i]; for (int j = 0; j < d_pointsCnt_v; j++) { float N_V = d_nTexture_v[iy * d_pointsCnt_v + j]; - int idx = (i * d_pointsCnt_v + j) * d_pointSize; - x += N_U * N_V * d_points[idx]; - y += N_U * N_V * d_points[idx + 1]; - z += N_U * N_V * d_points[idx + 2]; + int idx = (i * d_pointsCnt_v + j) * d_POINT_SIZE; + float w = d_points[idx + 3]; + x += N_U * N_V * w * d_points[idx]; + y += N_U * N_V * w * d_points[idx + 1]; + z += N_U * N_V * w * d_points[idx + 2]; + sumW += N_U * N_V * w; } } - printf("(%g, %g)-->(%g, %g, %g)\n", u, v, x, y, z); // %g输出,舍弃无意义的0 + x = x / sumW; + y = y / sumW; + z = z / sumW; +// printf("(%g, %g)-->(%g, %g, %g)\n", u, v, x, y, z); // %g输出,舍弃无意义的0 } __global__ void -NurbsSurface::g_derivative(const float *derTexture_u, const float *derTexture_v, const float *nTexture_u, - const float *nTexture_v, const float *d_points, int d_pointsCnt_u, int d_pointsCnt_v, - int d_pointSize, float d_lastKnot_u, float d_lastKnot_v, int d_sampleCnt_u, - int d_sampleCnt_v) { +NurbsSurface::g_derivative(float *derivatives, const float *derTexture_u, const float *derTexture_v, + const float *nTexture_u, const float *nTexture_v, const float *d_points, int d_pointsCnt_u, + int d_pointsCnt_v, int d_POINT_SIZE, float d_lastKnot_u, float d_lastKnot_v, + int d_sampleCnt_u, int d_sampleCnt_v) { // 二维grid和二维的block int ix = blockIdx.x * blockDim.x + threadIdx.x; int iy = blockIdx.y * blockDim.y + threadIdx.y; @@ -323,31 +420,157 @@ NurbsSurface::g_derivative(const float *derTexture_u, const float *derTexture_v, float u = ix * d_lastKnot_u / (d_sampleCnt_u - 1); float v = iy * d_lastKnot_v / (d_sampleCnt_v - 1); - float x_u = 0., y_u = 0, z_u = 0.; - float x_v = 0., y_v = 0, z_v = 0.; + float nubsPdx_u = 0., nubsPdy_u = 0, nubsPdz_u = 0., nubsPdw_u = 0.; + float nubsPdx_v = 0., nubsPdy_v = 0, nubsPdz_v = 0., nubsPdw_v = 0.; for (int i = 0; i < d_pointsCnt_u; i++) { for (int j = 0; j < d_pointsCnt_u; j++) { - int baseIdx = (i * d_pointsCnt_v + j) * d_pointSize; + int baseIdx = (i * d_pointsCnt_v + j) * d_POINT_SIZE; float factor_u = derTexture_u[ix * d_pointsCnt_u + i] * nTexture_v[iy * d_pointsCnt_v + j]; float factor_v = derTexture_v[iy * d_pointsCnt_v + j] * nTexture_u[ix * d_pointsCnt_u + i]; - x_u += factor_u * d_points[baseIdx]; - y_u += factor_u * d_points[baseIdx + 1]; - z_u += factor_u * d_points[baseIdx + 2]; + float wij = d_points[baseIdx + 3]; + nubsPdx_u += factor_u * wij * d_points[baseIdx]; + nubsPdy_u += factor_u * wij * d_points[baseIdx + 1]; + nubsPdz_u += factor_u * wij * d_points[baseIdx + 2]; + nubsPdw_u += factor_u * wij; + + nubsPdx_v += factor_v * wij * d_points[baseIdx]; + nubsPdy_v += factor_v * wij * d_points[baseIdx + 1]; + nubsPdz_v += factor_v * wij * d_points[baseIdx + 2]; + nubsPdw_v += factor_v * wij; + } + } - x_v += factor_v * d_points[baseIdx]; - y_v += factor_v * d_points[baseIdx + 1]; - z_v += factor_v * d_points[baseIdx + 2]; + float x = 0., y = 0., z = 0., w = 0.; + for (int i = 0; i < d_pointsCnt_u; i++) { + float N_U = nTexture_u[ix * d_pointsCnt_u + i]; + for (int j = 0; j < d_pointsCnt_v; j++) { + float N_V = nTexture_v[iy * d_pointsCnt_v + j]; + int idx = (i * d_pointsCnt_v + j) * d_POINT_SIZE; + float wij = d_points[idx + 3]; + x += N_U * N_V * wij * d_points[idx]; + y += N_U * N_V * wij * d_points[idx + 1]; + z += N_U * N_V * wij * d_points[idx + 2]; + w += N_U * N_V * wij; } } - float x_n = y_u * z_v - y_v * z_u, y_n = x_v * z_u - x_u * z_v, z_n = x_u * y_v - x_v * y_u; // 叉乘得到法向量 - printf("(%g,%g)-->u:(%g, %g, %g), v:(%g,%g,%g), normal:(%g,%g,%g)\n", u, v, x_u, y_u, z_u, x_v, y_v, z_v, - x_n, y_n, z_n); + + float w2 = w * w; + float pdx_u = (nubsPdx_u * w - x * nubsPdw_u) / w2; + float pdy_u = (nubsPdy_u * w - y * nubsPdw_u) / w2; + float pdz_u = (nubsPdz_u * w - z * nubsPdw_u) / w2; + + float pdx_v = (nubsPdx_v * w - x * nubsPdw_v) / w2; + float pdy_v = (nubsPdy_v * w - y * nubsPdw_v) / w2; + float pdz_v = (nubsPdz_v * w - z * nubsPdw_v) / w2; +// float pdz_u = (nubsPdz_u * w - z ) + + int baseIdx = (ix * d_sampleCnt_v + iy) * 6; + derivatives[baseIdx] = pdx_u; + derivatives[baseIdx + 1] = pdy_u; + derivatives[baseIdx + 2] = pdz_u; + derivatives[baseIdx + 3] = pdx_v; + derivatives[baseIdx + 4] = pdy_v; + derivatives[baseIdx + 5] = pdz_v; + + float x_n = pdy_u * pdz_v - pdy_v * pdz_u, y_n = pdx_v * pdz_u - pdx_u * pdz_v, z_n = + pdx_u * pdy_v - pdx_v * pdy_u; // 叉乘得到法向量 + if((ix == 8 && iy == 9) || (ix == 7 && iy == 9) || (ix == 9 && iy == 9) || (ix == 8 && iy == 8) || (ix == 8 && iy == 10)) + printf("(%g,%g)-->u:(%g, %g, %g), v:(%g,%g,%g), normal:(%g,%g,%g)\n", u, v, pdx_u, pdy_u, pdz_u, pdx_v, pdy_v, + pdz_v, x_n, y_n, z_n); +} + +__global__ void +NurbsSurface::g_curvature(const float *derivatives, int sampleCnt_u, int sampleCnt_v, float lastKnot_u, + float lastKnot_v) { + // 二维grid和二维的block + int ix = blockIdx.x * blockDim.x + threadIdx.x; + int iy = blockIdx.y * blockDim.y + threadIdx.y; + + if (ix >= sampleCnt_u || iy >= sampleCnt_v) { + return; + } + + + float step_u = lastKnot_u / (sampleCnt_u - 1), step_v = lastKnot_v / (sampleCnt_v - 1); + float u = ix * step_u, v = iy * step_v; + + int baseIdx = (ix * sampleCnt_v + iy) * 6; + int lastBaseIdx_u = ((ix - 1) * sampleCnt_v + iy) * 6, nextBaseIdx_u = ((ix + 1) * sampleCnt_v + iy) * 6; + int lastBaseIdx_v = (ix * sampleCnt_v + iy - 1) * 6, nextBaseIdx_v = (ix * sampleCnt_v + iy + 1) * 6; + +// printf("(%g,%g)-->u:(%g, %g, %g), v:(%g,%g,%g)\n", u, v, derivatives[baseIdx], derivatives[baseIdx + 1], +// derivatives[baseIdx + 2], derivatives[baseIdx + 3], derivatives[baseIdx + 4], derivatives[baseIdx + 5]); + + float sndPdx_uu, sndPdy_uu, sndPdz_uu, sndPdx_vv, sndPdy_vv, sndPdz_vv; // 二阶导 + float sndPdx_uv, sndPdy_uv, sndPdz_uv, sndPdx_vu, sndPdy_vu, sndPdz_vu; + + if (ix == 0) { + sndPdx_uu = (derivatives[nextBaseIdx_u] - derivatives[baseIdx]) / step_u; + sndPdy_uu = (derivatives[nextBaseIdx_u + 1] - derivatives[baseIdx + 1]) / step_u; + sndPdz_uu = (derivatives[nextBaseIdx_u + 2] - derivatives[baseIdx + 2]) / step_u; + + sndPdx_vu = (derivatives[nextBaseIdx_u + 3] - derivatives[baseIdx + 3]) / step_u; + sndPdy_vu = (derivatives[nextBaseIdx_u + 4] - derivatives[baseIdx + 4]) / step_u; + sndPdz_vu = (derivatives[nextBaseIdx_u + 5] - derivatives[baseIdx + 5]) / step_u; + } else if (ix == sampleCnt_u - 1) { + sndPdx_uu = (derivatives[baseIdx] - derivatives[lastBaseIdx_u]) / step_u; + sndPdy_uu = (derivatives[baseIdx + 1] - derivatives[lastBaseIdx_u + 1]) / step_u; + sndPdz_uu = (derivatives[baseIdx + 2] - derivatives[lastBaseIdx_u + 2]) / step_u; + + sndPdx_vu = (derivatives[baseIdx + 3] - derivatives[lastBaseIdx_u + 3]) / step_u; + sndPdy_vu = (derivatives[baseIdx + 4] - derivatives[lastBaseIdx_u + 4]) / step_u; + sndPdz_vu = (derivatives[baseIdx + 5] - derivatives[lastBaseIdx_u + 5]) / step_u; + } else { + sndPdx_uu = (derivatives[nextBaseIdx_u] - derivatives[lastBaseIdx_u]) / (2 * step_u); + sndPdy_uu = (derivatives[nextBaseIdx_u + 1] - derivatives[lastBaseIdx_u + 1]) / (2 * step_u); + sndPdz_uu = (derivatives[nextBaseIdx_u + 2] - derivatives[lastBaseIdx_u + 2]) / (2 * step_u); + + sndPdx_vu = (derivatives[nextBaseIdx_u + 3] - derivatives[lastBaseIdx_u + 3]) / (2 * step_u); + sndPdy_vu = (derivatives[nextBaseIdx_u + 4] - derivatives[lastBaseIdx_u + 4]) / (2 * step_u); + sndPdz_vu = (derivatives[nextBaseIdx_u + 5] - derivatives[lastBaseIdx_u + 5]) / (2 * step_u); + } + + if (iy == 0) { + sndPdx_vv = (derivatives[nextBaseIdx_v + 3] - derivatives[baseIdx + 3]) / step_v; + sndPdy_vv = (derivatives[nextBaseIdx_v + 4] - derivatives[baseIdx + 4]) / step_v; + sndPdz_vv = (derivatives[nextBaseIdx_v + 5] - derivatives[baseIdx + 5]) / step_v; + + sndPdx_uv = (derivatives[nextBaseIdx_v] - derivatives[baseIdx]) / step_v; + sndPdy_uv = (derivatives[nextBaseIdx_v + 1] - derivatives[baseIdx + 1]) / step_v; + sndPdz_uv = (derivatives[nextBaseIdx_v + 2] - derivatives[baseIdx + 2]) / step_v; + } else if (iy == sampleCnt_v - 1) { + sndPdx_vv = (derivatives[baseIdx + 3] - derivatives[lastBaseIdx_v + 3]) / step_v; + sndPdy_vv = (derivatives[baseIdx + 4] - derivatives[lastBaseIdx_v + 4]) / step_v; + sndPdz_vv = (derivatives[baseIdx + 5] - derivatives[lastBaseIdx_v + 5]) / step_v; + + sndPdx_uv = (derivatives[baseIdx] - derivatives[lastBaseIdx_v]) / step_v; + sndPdy_uv = (derivatives[baseIdx + 1] - derivatives[lastBaseIdx_v + 1]) / step_v; + sndPdz_uv = (derivatives[baseIdx + 2] - derivatives[lastBaseIdx_v + 2]) / step_v; + } else { + sndPdx_vv = (derivatives[nextBaseIdx_v + 3] - derivatives[lastBaseIdx_v + 3]) / (2 * step_v); + sndPdy_vv = (derivatives[nextBaseIdx_v + 4] - derivatives[lastBaseIdx_v + 4]) / (2 * step_v); + sndPdz_vv = (derivatives[nextBaseIdx_v + 5] - derivatives[lastBaseIdx_v + 5]) / (2 * step_v); + + sndPdx_uv = (derivatives[nextBaseIdx_v] - derivatives[lastBaseIdx_v]) / (2 * step_v); + sndPdy_uv = (derivatives[nextBaseIdx_v + 1] - derivatives[lastBaseIdx_v + 1]) / (2 * step_v); + sndPdz_uv = (derivatives[nextBaseIdx_v + 2] - derivatives[lastBaseIdx_v + 2]) / (2 * step_v); + } + + float uvx = (sndPdx_uv + sndPdx_vu) / 2, uvy = (sndPdy_uv + sndPdy_vu) / 2, uvz = (sndPdz_uv + sndPdz_vu) / 2; + normalization(sndPdx_uu, sndPdy_uu, sndPdz_uu); + normalization(uvx, uvy, uvz); + normalization(sndPdx_vv, sndPdy_vv, sndPdz_vv); + + + if(ix == 8 && iy == 9) + printf("(%g, %g) --> uu: (%g, %g, %g), uv: (%g, %g, %g), vv: (%g, %g, %g)\n", u, v, sndPdx_uu, sndPdy_uu, sndPdz_uu, + uvx, uvy, uvz, sndPdx_vv, sndPdy_vv, sndPdz_vv); } __global__ void NurbsCurve::g_evaluate(const float *NTexture, const float *d_points, const int d_pointsCnt, - const int d_pointSize, const float d_lastKnot, const int d_sampleCnt) { + const int d_POINT_SIZE, const float d_lastKnot, const int d_sampleCnt) { // printf(" curve calculating... \n"); // 二维grid和一维的block // int idx = (blockIdx.y * gridDim.x + blockIdx.x) * blockDim.x + threadIdx.x; @@ -362,35 +585,86 @@ NurbsCurve::g_evaluate(const float *NTexture, const float *d_points, const int d // // 注意,在device中,全局内存还是以malloc和free的方式分配和回收的,而不是使用cudaMalloc和cudaFree // auto *N_dp = (float *) malloc((d_degree + 1) * (d_knotsCnt - 1) * sizeof(float)); // d_basisFunction(N_dp, d_knots, u, d_degree, d_knotsCnt); - float x = 0., y = 0., z = 0.; + float x = 0., y = 0., z = 0., sumW = 0.; for (int i = 0; i < d_pointsCnt; i++) { float N = NTexture[idx * d_pointsCnt + i]; - int baseIdx = i * d_pointSize; - x += N * d_points[baseIdx]; - y += N * d_points[baseIdx + 1]; - z += N * d_points[baseIdx + 2]; + int baseIdx = i * d_POINT_SIZE; + float w = d_points[baseIdx + 3]; + x += N * w * d_points[baseIdx]; + y += N * w * d_points[baseIdx + 1]; + z += N * w * d_points[baseIdx + 2]; + sumW += N * w; } + x = x / sumW; + y = y / sumW; + z = z / sumW; printf("(%g)-->(%g, %g, %g)\n", u, x, y, z); // %g输出,舍弃无意义的0 } __global__ void -NurbsCurve::g_derivative(const float *derTexture, const float *d_points, int d_pointsCnt, int d_pointSize, +NurbsCurve::g_derivative(float *derivatives, const float *derTexture, const float *nTexture, const float *d_points, + int d_pointsCnt, int d_POINT_SIZE, float d_lastKnot, int d_sampleCnt) { // 二维block和一维grid int idx = blockIdx.x * blockDim.x * blockDim.y + threadIdx.y * blockDim.x + threadIdx.x; if (idx >= d_sampleCnt) return; float u = idx * d_lastKnot / (d_sampleCnt - 1); - float x = 0., y = 0, z = 0.; -// printf("pointSize: %d\n", d_pointSize); + float nubs_dx = 0., nubs_dy = 0., nubs_dz = 0., nubs_dw = 0.; +// printf("POINT_SIZE: %d\n", d_POINT_SIZE); + for (int i = 0; i < d_pointsCnt; i++) { - int baseIdx = i * d_pointSize; + int baseIdx = i * d_POINT_SIZE; float nFactor = derTexture[idx * d_pointsCnt + i]; - x += nFactor * d_points[baseIdx]; - y += nFactor * d_points[baseIdx + 1]; - z += nFactor * d_points[baseIdx + 2]; + float wi = d_points[baseIdx + 3]; + nubs_dx += nFactor * wi * d_points[baseIdx]; + nubs_dy += nFactor * wi * d_points[baseIdx + 1]; + nubs_dz += nFactor * wi * d_points[baseIdx + 2]; + nubs_dw += nFactor * wi; // printf("(x, y, z): (%g, %g, %g)\n", d_points[baseIdx], d_points[baseIdx + 1], d_points[baseIdx + 2]); } - printf("(%g)-->(%g, %g, %g)\n", u, x, y, z); + + float x = 0., y = 0., z = 0., w = 0.; + for (int i = 0; i < d_pointsCnt; i++) { + float N = nTexture[idx * d_pointsCnt + i]; + int baseIdx = i * d_POINT_SIZE; + float wi = d_points[baseIdx + 3]; + x += N * wi * d_points[baseIdx]; + y += N * wi * d_points[baseIdx + 1]; + z += N * wi * d_points[baseIdx + 2]; + w += N * wi; + } + float dx = (nubs_dx * w - x * nubs_dw) / (w * w); + float dy = (nubs_dy * w - y * nubs_dw) / (w * w); + float dz = (nubs_dz * w - z * nubs_dw) / (w * w); + int baseIdx = idx * 3; + derivatives[baseIdx] = dx; + derivatives[baseIdx + 1] = dy; + derivatives[baseIdx + 2] = dz; + printf("(%g)-->(%g, %g, %g)\n", u, dx, dy, dz); +} + +__global__ void NurbsCurve::g_curvature(const float *derivatives, int sampleCnt, float lastKnot) { + // 二维block和一维grid + int idx = blockIdx.x * blockDim.x * blockDim.y + threadIdx.y * blockDim.x + threadIdx.x; + if (idx >= sampleCnt) return; + float step = lastKnot / (sampleCnt - 1); + float u = idx * step; + float sndPdx, sndPdy, sndPdz; // 二阶导 + int baseIdx = idx * 3, lastBaseIdx = (idx - 1) * 3, nextBaseIdx = (idx + 1) * 3; + if (idx == 0) { + sndPdx = (derivatives[nextBaseIdx] - derivatives[baseIdx]) / step; + sndPdy = (derivatives[nextBaseIdx + 1] - derivatives[baseIdx + 1]) / step; + sndPdz = (derivatives[nextBaseIdx + 2] - derivatives[baseIdx + 2]) / step; + } else if (idx == sampleCnt - 1) { + sndPdx = (derivatives[baseIdx] - derivatives[lastBaseIdx]) / step; + sndPdy = (derivatives[baseIdx + 1] - derivatives[lastBaseIdx + 1]) / step; + sndPdz = (derivatives[baseIdx + 2] - derivatives[lastBaseIdx + 2]) / step; + } else { + sndPdx = (derivatives[nextBaseIdx] - derivatives[lastBaseIdx]) / (2 * step); + sndPdy = (derivatives[nextBaseIdx + 1] - derivatives[lastBaseIdx + 1]) / (2 * step); + sndPdz = (derivatives[nextBaseIdx + 2] - derivatives[lastBaseIdx + 2]) / (2 * step); + } + printf("%g --> (%g, %g, %g)\n", u, sndPdx, sndPdy, sndPdz); } __global__ void g_basisTexture(float *nTexture, float *nTexture1, const float *d_knots, int d_pointsCnt, int d_knotsCnt, @@ -446,10 +720,9 @@ __host__ NurbsCurve::Evaluator::Evaluator(std::vector> contro recordTime = false; d_nTexture = nullptr; d_nTexture1 = nullptr; - - sampleCnt = 0; d_points = nullptr; d_knots = nullptr; + d_derivatives = nullptr; } @@ -482,6 +755,13 @@ __device__ bool d_floatEqual(float a, float b) { return abs(a - b) < 0.00001; } +__host__ void myCudaFree(float *&p) { + if (p != nullptr) { + cudaFree(p); + p = nullptr; + } +} + void NurbsCurve::Evaluator::setRecordTime(bool r) { recordTime = r; @@ -492,18 +772,20 @@ void NurbsSurface::Evaluator::setRecordTime(bool r) { } NurbsSurface::Evaluator::~Evaluator() { - cudaFree(d_nTexture_u); - cudaFree(d_nTexture_v); - cudaFree(d_nTexture1_u); - cudaFree(d_nTexture1_v); - cudaFree(d_points); - cudaFree(d_knots_u); - cudaFree(d_knots_v); + myCudaFree(d_nTexture_u); + myCudaFree(d_nTexture_v); + myCudaFree(d_nTexture1_u); + myCudaFree(d_nTexture1_v); + myCudaFree(d_points); + myCudaFree(d_knots_u); + myCudaFree(d_knots_v); + cudaDeviceReset(); } NurbsCurve::Evaluator::~Evaluator() { - cudaFree(d_nTexture); - cudaFree(d_nTexture1); - cudaFree(d_points); - cudaFree(d_knots); -} + myCudaFree(d_nTexture); + myCudaFree(d_nTexture1); + myCudaFree(d_points); + myCudaFree(d_knots); + cudaDeviceReset(); +} \ No newline at end of file diff --git a/NurbsEvaluator.cuh b/NurbsEvaluator.cuh index 9aac536..3a0ca94 100644 --- a/NurbsEvaluator.cuh +++ b/NurbsEvaluator.cuh @@ -6,6 +6,14 @@ #include #include +const int POINT_SIZE = 4; + +/** + * 保证释放后的指针指向空。这样一来保证指针不乱指,free的时候不会出错、二来可以判断指针是否已经free + * 注意指针是引用传参,因为要把指针本身置空 + */ +__host__ void myCudaFree(float *&p); + namespace NurbsSurface { /** * 曲线计算的核函数 @@ -17,10 +25,14 @@ namespace NurbsSurface { int d_sampleCnt_v); __global__ static void - g_derivative(const float *derTexture_u, const float *derTexture_v, const float *nTexture_u, const float *nTexture_v, + g_derivative(float *derivatives, const float *derTexture_u, const float *derTexture_v, const float *nTexture_u, + const float *nTexture_v, const float *d_points, int d_pointsCnt_u, int d_pointsCnt_v, int d_pointSize, float d_lastKnot_u, float d_lastKnot_v, int d_sampleCnt_u, int d_sampleCnt_v); + __global__ static void + g_curvature(const float *derivatives, int sampleCnt_u, int sampleCnt_v, float lastKnot_u, float lastKnot_v); + class Evaluator { private: std::vector>> controlPoints; @@ -36,8 +48,10 @@ namespace NurbsSurface { float *d_nTexture1_u; // u方向指向度为p-1时的device中的nurbs基函数矩阵 float *d_nTexture1_v; // v方向指向度为p-1时的device中的nurbs基函数矩阵 - int sampleCnt_u; - int sampleCnt_v; + float *d_derivatives; // 一阶导计算结果 + +// int sampleCnt_u; +// int sampleCnt_v; public: /** @@ -61,7 +75,12 @@ namespace NurbsSurface { /** * 供外部CPU程序使用的、负责调用gpu并行计算切向量的方法 */ - __host__ void derivative(); + __host__ void derivative(int sampleCnt_u, int sampleCnt_v); + + /** + * 供外部CPU程序使用的、负责调用gpu并行计算二阶导的方法 + */ + __host__ void curvature(int sampleCnt_u, int sampleCnt_v); void setRecordTime(bool r); @@ -86,9 +105,11 @@ namespace NurbsCurve { float d_lastKnot, int d_sampleCnt); __global__ static void - g_derivative(const float *derTexture, const float *d_points, int d_pointsCnt, int d_pointSize, float d_lastKnot, + g_derivative(float *derivatives, const float *derTexture, const float *nTexture, const float *d_points, + int d_pointsCnt, int d_pointSize, float d_lastKnot, int d_sampleCnt); + __global__ static void g_curvature(const float *derivatives, int sampleCnt, float lastKnot); class Evaluator { private: @@ -101,7 +122,8 @@ namespace NurbsCurve { float *d_nTexture; // 指向度为p时的device中的nurbs基函数矩阵 float *d_nTexture1; // 指向度为p-1时的device中的nurbs基函数矩阵 - int sampleCnt; + float *d_derivatives{}; // 一阶导计算结果 + public: /** * 构造函数 @@ -119,7 +141,12 @@ namespace NurbsCurve { /** * 供外部CPU程序使用的、负责调用gpu并行计算切向量的方法 */ - __host__ void derivative(); + __host__ void derivative(int sampleCnt); + + /** + * 供外部CPU程序使用的、负责调用gpu并行计算二阶导的方法 + */ + __host__ void curvature(int sampleCnt); __host__ ~Evaluator(); diff --git a/main.cpp b/main.cpp index efece4a..770b91c 100644 --- a/main.cpp +++ b/main.cpp @@ -3,31 +3,41 @@ int main() { NurbsSurface::Evaluator nurbsSurfaceEvaluator({ - {{-1, 0, 0}, {0, 1, 6}, {1, 0, 4}, {2, 0.5, 3}, {3, 3, 1}, {4, -5, 0}}, - {{-2, 1, 1.2}, {1, 2, 3}, {2, 2, 3}, {-1, -0.3, 2}, {-1, 2, 0}, {7, -8, 2}}, - {{-3.4, 2, 3}, {2, 3, 0}, {4, 3, 7}, {-2, 0, -0.2}, {1, 1.7, 5}, {9, -10.3, 6}}, - {{-1.5, 3.2, 1}, {2.6, 7, -2}, {5, 0.8, 4.2}, {-4, 1, 4}, {2.1, 4, -2}, {11, -6, 4}}, - {{-0.2, 2, 0}, {5, 3, 2}, {5, 1.5, 1.4}, {-3, 2, 5}, {0.8, 1.3, 0}, {15, -2, 0.9}}, - {{3, 1.4, -1}, {6, 2, 4}, {-1, 0, -2}, {0, 2.8, 2}, {-0.5, 2, 1.2}, {7, -3, -2}},}, + {{-1, 0, 0, 0.3}, {0, 1, 6, 0.8}, {1, 0, 4, 0.5}, {2, 0.5, 3, 0.8}, {3, 3, 1, 0.6}, {4, -5, 0, 0.7}}, + {{-2, 1, 1.2, 0.2}, {1, 2, 3, 0.3}, {2, 2, 3, 0.6}, {-1, -0.3, 2, 0.4}, {-1, 2, 0, 0.9}, {7, -8, 2, 0.3}}, + {{-3.4, 2, 3, 0.8}, {2, 3, 0, 0.6}, {4, 3, 7, 0.3}, {-2, 0, -0.2, 0.4}, {1, 1.7, 5, 0.6}, {9, -10.3, 6, 0.7}}, + {{-1.5, 3.2, 1, 0.5}, {2.6, 7, -2, 0.7}, {5, 0.8, 4.2, 0.8}, {-4, 1, 4, 0.7}, {2.1, 4, -2, 0.3}, {11, -6, 4, 0.6}}, + {{-0.2, 2, 0, 0.7}, {5, 3, 2, 0.4}, {5, 1.5, 1.4, 0.6}, {-3, 2, 5, 0.8}, {0.8, 1.3, 0, 0.5}, {15, -2, 0.9, 0.6}}, + {{3, 1.4, -1, 0.4}, {6, 2, 4, 0.6}, {-1, 0, -2, 0.4}, {0, 2.8, 2, 0.6}, {-0.5, 2, 1.2, 0.9}, {7, -3, -2, 0.3}},}, {0, 0, 0, 0.1, 0.5, 0.8, 1, 1, 1}, {0, 0, 0, 0.2, 0.7, 0.8, 1, 1, 1}); +// NurbsSurface::Evaluator nurbsSurfaceEvaluator({ +// {{-1, 0, 0, 1}, {0, 1, 6, 1}, {1, 0, 4, 1}, {2, 0.5, 3, 1}, {3, 3, 1, 1}, {4, -5, 0, 1}}, +// {{-2, 1, 1.2, 1}, {1, 2, 3, 1}, {2, 2, 3, 1}, {-1, -0.3, 2, 1}, {-1, 2, 0, 1}, {7, -8, 2, 1}}, +// {{-3.4, 2, 3, 1}, {2, 3, 0, 1}, {4, 3, 7, 1}, {-2, 0, -0.2, 1}, {1, 1.7, 5, 1}, {9, -10.3, 6, 1}}, +// {{-1.5, 3.2, 1, 1}, {2.6, 7, -2, 1}, {5, 0.8, 4.2, 1}, {-4, 1, 4, 1}, {2.1, 4, -2, 1}, {11, -6, 4, 1}}, +// {{-0.2, 2, 0, 1}, {5, 3, 2, 1}, {5, 1.5, 1.4, 1}, {-3, 2, 5, 1}, {0.8, 1.3, 0, 1}, {15, -2, 0.9, 1}}, +// {{3, 1.4, -1, 1}, {6, 2, 4, 1}, {-1, 0, -2, 1}, {0, 2.8, 2, 1}, {-0.5, 2, 1.2, 1}, {7, -3, -2, 1}},}, +// {0, 0, 0, 0.1, 0.5, 0.8, 1, 1, 1}, +// {0, 0, 0, 0.2, 0.7, 0.8, 1, 1, 1}); nurbsSurfaceEvaluator.setRecordTime(true); - nurbsSurfaceEvaluator.evaluate(3, 4); + nurbsSurfaceEvaluator.curvature(10001, 10001); printf("==============================\n"); NurbsCurve::Evaluator nurbsCurveEvaluator( - {{-1, 0, 0}, - {0, 1, 6}, - {1, 0, 4}, - {2, 0.5, 3}, - {3, 3, 1}, - {4, -5, 0}}, + {{-1, 0, 0, 0.3}, + {0, 1, 6, 0.4}, + {1, 0, 4, 0.5}, + {2, 0.5, 3, 0.4}, + {3, 3, 1, 0.5}, + {4, -5, 0, 0.7}}, {0, 0, 0, 0.1, 0.5, 0.8, 1, 1, 1}); nurbsCurveEvaluator.setRecordTime(true); - nurbsCurveEvaluator.evaluate(11); + nurbsCurveEvaluator.curvature(11); printf("\n"); // nurbsCurveEvaluator.derivative(); return 0; -} \ No newline at end of file +} +