#ifndef UNTITLED1_NURBSEVALUATOR_CUH
#define UNTITLED1_NURBSEVALUATOR_CUH

#include <cuda_runtime.h>
#include <thrust/device_vector.h>
#include <vector>
#include <map>

const int POINT_SIZE = 4;

/**
 *  保证释放后的指针指向空。这样一来保证指针不乱指，free的时候不会出错、二来可以判断指针是否已经free
 *  注意指针是引用传参，因为要把指针本身置空
 */
__host__ void myCudaFree(float *&p);

namespace NurbsSurface {
    /**
     * 曲线计算的核函数
     * @param d_pointSize 点的大小(3: [x, y, z] | 4：[x, y, z, w])
     */
    __global__ static void
    g_evaluate(const float *d_nTexture_u, const float *d_nTexture_v, const float *d_points, int d_pointsCnt_u,
               int d_pointsCnt_v, int d_pointSize, float d_lastKnot_u, float d_lastKnot_v, int d_sampleCnt_u,
               int d_sampleCnt_v);

    __global__ static void
    g_derivative(float *derivatives, const float *derTexture_u, const float *derTexture_v, const float *nTexture_u,
                 const float *nTexture_v,
                 const float *d_points, int d_pointsCnt_u, int d_pointsCnt_v, int d_pointSize, float d_lastKnot_u,
                 float d_lastKnot_v, int d_sampleCnt_u, int d_sampleCnt_v);

    __global__ static void
    g_curvature(const float *derivatives, int sampleCnt_u, int sampleCnt_v, float lastKnot_u, float lastKnot_v);

    class Evaluator {
    private:
        std::vector<std::vector<std::vector<float>>> controlPoints;
        float *d_points;
        std::vector<float> knots_u;
        std::vector<float> knots_v;
        float *d_knots_u;
        float *d_knots_v;
        bool recordTime;

        float *d_nTexture_u;   // u方向指向度为p时的device中的nurbs基函数矩阵
        float *d_nTexture_v;   // v方向指向度为p时的device中的nurbs基函数矩阵
        float *d_nTexture1_u;   // u方向指向度为p-1时的device中的nurbs基函数矩阵
        float *d_nTexture1_v;   // v方向指向度为p-1时的device中的nurbs基函数矩阵

        float *d_derivatives;  // 一阶导计算结果

//        int sampleCnt_u;
//        int sampleCnt_v;

    public:
        /**
         * 构造函数
         * @param controlPoints 控制点矩阵[pointsCnt_u][pointsCnt_v][3]
         * @param knots_u u方向knots
         * @param knots_v v方向knots
         */
        __host__ explicit Evaluator(std::vector<std::vector<std::vector<float>>> controlPoints,
                                    std::vector<float> knots_u, std::vector<float> knots_v);

        /**
         * 供外部CPU程序使用的、负责调用gpu并行计算的方法
         * @param sampleCnt_u u方向采样数目
         * @param sampleCnt_v v方向采样数目
         * @return 由 map 组成的vector{<<u, v>, {x, y, z}>}
         */
        __host__ std::vector<std::map<std::pair<float, float>, std::vector<float>>>
        evaluate(int sampleCnt_u_, int sampleCnt_v_);

        /**
         * 供外部CPU程序使用的、负责调用gpu并行计算切向量的方法
         */
        __host__ void derivative(int sampleCnt_u, int sampleCnt_v);

        /**
         * 供外部CPU程序使用的、负责调用gpu并行计算二阶导的方法
         */
        __host__ void curvature(int sampleCnt_u, int sampleCnt_v);

        void setRecordTime(bool r);

        ~Evaluator();

    };
}

/**
 * 曲线部分
 */
namespace NurbsCurve {

    __global__ void g_test(float *nTexture);

    /**
     * 曲线计算的核函数
     * @param d_pointSize 点的大小(3: [x, y, z] | 4：[x, y, z, w])
     */
    __global__ static void
    g_evaluate(const float *NTexture, const float *d_points, int d_pointsCnt, int d_pointSize,
               float d_lastKnot, int d_sampleCnt);

    __global__ static void
    g_derivative(float *derivatives, const float *derTexture, const float *nTexture, const float *d_points,
                 int d_pointsCnt, int d_pointSize, float d_lastKnot,
                 int d_sampleCnt);

    __global__ static void g_curvature(const float *derivatives, int sampleCnt, float lastKnot);

    class Evaluator {
    private:
        std::vector<std::vector<float>> controlPoints;
        std::vector<float> knots;
        float *d_knots;
        float *d_points;
        bool recordTime;

        float *d_nTexture;   // 指向度为p时的device中的nurbs基函数矩阵
        float *d_nTexture1;   // 指向度为p-1时的device中的nurbs基函数矩阵

        float *d_derivatives{};  // 一阶导计算结果

    public:
        /**
         * 构造函数
         * @param controlPoints 控制点矩阵[pointsCnt][3]
         */
        __host__ explicit Evaluator(std::vector<std::vector<float>> controlPoints, std::vector<float> knots);

        /**
         * 供外部CPU程序使用的、负责调用gpu并行进行evaluation的方法
         * @param sampleCnt_ 在参数域内均匀采样的采样数，它会更新成员变量中的sampleCnt
         * @return 由 map 组成的vector{<u, {x, y, z}>}
         */
        __host__ std::vector<std::map<float, std::vector<float>>> evaluate(int sampleCnt_);

        /**
         * 供外部CPU程序使用的、负责调用gpu并行计算切向量的方法
         */
        __host__ void derivative(int sampleCnt);

        /**
         * 供外部CPU程序使用的、负责调用gpu并行计算二阶导的方法
         */
        __host__ void curvature(int sampleCnt);

        __host__ ~Evaluator();

        void setRecordTime(bool r);
    };
}


/**
 * 计算并保存基函数值
 * @param nTexture 记录度数为p的基函数值，规模为【sampleCnt，pointsCnt】
 * @param nTexture1 记录度数为p-1的基函数值，规模为【sampleCnt+1，pointsCnt】
 */
__global__ static void
g_basisTexture(float *nTexture, float *nTexture1, const float *d_knots, int d_pointsCnt, int d_knotsCnt,
               int d_sampleCnt);

/**
 * 计算并保存基函数对采样点切向量的分量值
 * @param derTexture 记录度数为p的Nurbs基函数对采样点切向量的分量值，大小为【sampleCnt，pointsCnt】
 * @param nTexture1 度数为p-1的基函数值，规模为【sampleCnt+1，pointsCnt】
 */
__global__ static void
g_derTexture(float *derTexture, const float *nTexture1, const float *d_knots, int d_pointsCnt, int d_knotsCnt,
             int d_sampleCnt);

/**
 * 当u值已知时，根据基函数N的递推表达式，采用动态规划的方式求解N值
 * @param N_Texture 结果返回在N_Texture中
 */
__device__ void d_basisFunction(float *nTexture, const float *knots, float u, int degree, int d_knotsCnt);

/**
 * device中判断两个浮点数是否相等。与CPU中一样，GPU中的浮点数也存在很小的误差，直接使用==判断往往容易将相等误判为不等
 * @return true：相等
 */
__device__ bool d_floatEqual(float a, float b);

#endif