import os
import sys
import time

project_dir = os.path.abspath(os.path.join(os.path.dirname(__file__), os.pardir))
sys.path.append(project_dir)
os.chdir(project_dir)

import torch
import numpy as np
from torch.utils.tensorboard import SummaryWriter
from tqdm import tqdm
from pyhocon import ConfigFactory
from scipy.spatial import cKDTree

from utils.logger import logger
from utils.general import get_class
from data_loader import NHREP_Dataset
from loss import LossManager
from learning_rate import LearningRateScheduler
from model.network import NHRepNet  # 导入 NHRepNet
from model.sample import Sampler

class NHREPNet_Training:
    def __init__(self, name_prefix: str, conf, if_baseline: bool = False, if_feature_sample: bool = False):
        self.conf = conf
        self.sampler = Sampler.get_sampler(
                self.conf.get_string('network.sampler.sampler_type'))(
                    global_sigma=self.conf.get_float('network.sampler.properties.global_sigma'),
                    local_sigma=self.conf.get_float('network.sampler.properties.local_sigma')
                )
        data_dir = self.conf.get_string('train.input_path')
        self.dataset = NHREP_Dataset(data_dir, name_prefix, if_baseline, if_feature_sample)
        self.device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

        # 初始化模型
        self.d_in = 3  # 输入维度，x, y, z.
        self.dims_sdf = [256, 256, 256]  # 隐藏层维度

        self.nepochs = 15000  # 训练轮数
        folder = self.conf.get_string('train.folderprefix')
        self.writer = SummaryWriter(os.path.join("summary",folder, name_prefix))  # TensorBoard 记录器

        # checkpoint
        self.init_checkpoints()


    def run_nhrepnet_training(self):
        # 数据准备
        logger.info("数据准备")
        self.data = self.dataset.get_data().to(self.device).requires_grad_() # x, y, z, nx, ny, nz
        feature_mask_cpu = self.dataset.get_feature_mask().numpy() # 特征掩码
        self.feature_mask = torch.from_numpy(feature_mask_cpu).to(self.device)  # 特征掩码 # 特征掩码
        self.points_batch = 16384 # 批次大小
        self.compute_local_sigma()

        n_branch = int(torch.max(self.feature_mask).item())  # 计算分支数量
        n_batchsize = self.points_batch  # 设置批次大小
        n_patch_batch = n_batchsize // n_branch  # 计算每个分支的补丁批次大小
        n_patch_last = n_batchsize - n_patch_batch * (n_branch - 1)  # 计算最后一个分支的补丁大小
        # 1，准备训练数据
        # 1.1，计算每个分支的补丁数量
        patch_id, patch_id_n = self.compute_patch(n_branch, n_patch_batch, n_patch_last, feature_mask_cpu)

        # 1.2，获取分支掩码
        branch_mask, single_branch_mask_gt, single_branch_mask_id = self.get_branch_mask(n_branch, n_patch_batch, n_patch_last)


        # 1.3，初始化模型
        csg_tree, flag_convex = self.dataset.get_csg_tree()
        self.model = get_class(self.conf.get_string('train.network_class'))(
                d_in=self.d_in,
                n_branch=n_branch,
                csg_tree=csg_tree,
                flag_convex=flag_convex,
                **self.conf.get_config('network.inputs')
            ).to(self.device)
        self.scheduler = LearningRateScheduler(self.conf.get_list('train.learning_rate_schedule'), self.conf.get_float('train.weight_decay'), self.model.parameters())
        self.loss_manager = LossManager(ablation="none")


        logger.info("开始训练")
        self.model.train()  # 设置模型为训练模式

        for epoch in tqdm(range(self.nepochs), desc="训练进度", unit="epoch"):  # 开始训练循环
            try:
                self.train_one_epoch(epoch, patch_id, patch_id_n, n_patch_batch, n_patch_last, n_branch, n_batchsize)
            except Exception as e:
                logger.error(f"训练过程中发生错误: {str(e)}")
                break
        self.tracing()

    def train_one_epoch(self, epoch, patch_id, patch_id_n, n_patch_batch, n_patch_last, n_branch, n_batchsize):
        #logger.info(f"Epoch {epoch}/{self.nepochs} 开始")
        # 1.3，获取索引
        indices = self.get_indices(patch_id, patch_id_n, n_patch_batch, n_patch_last, n_branch)

        # 1.4，获取数据
        cur_data = self.data[indices]  # x, y, z, nx, ny, nz
        mnfld_pnts = cur_data[:, :self.d_in]  # 提取流形点
        mnfld_sigma = self.local_sigma[indices]  # 提取噪声点

        nonmnfld_pnts = self.sampler.get_points(mnfld_pnts.unsqueeze(0), mnfld_sigma.unsqueeze(0)).squeeze()  # 生成非流形点

        #TODO 记录了log

        # 2，前向传播
        #logger.info(f"mnfld_pnts: {mnfld_pnts.shape}")
        #logger.info(f"nonmnfld_pnts: {nonmnfld_pnts.shape}")
        
        # 前向传播
        mnfld_pred_all = self.model(mnfld_pnts)  # 使用模型进行前向传播
        nonmnfld_pred_all = self.model(nonmnfld_pnts)  # 使用模型进行前向传播

        #logger.info(f"mnfld_pred_all: {mnfld_pred_all.shape}")
        #logger.info(f"nonmnfld_pred_all: {nonmnfld_pred_all.shape}")

        normals = cur_data[:, -self.d_in:]
        # 计算损失
        loss,loss_details = self.loss_manager.compute_loss(
            mnfld_pnts = mnfld_pnts,
            normals = normals,
            mnfld_pred_all = mnfld_pred_all,
            nonmnfld_pnts = nonmnfld_pnts,
            nonmnfld_pred_all = nonmnfld_pred_all,
            n_batchsize = n_batchsize,
            n_branch = n_branch,
            n_patch_batch = n_patch_batch,
            n_patch_last = n_patch_last,
        )  # 计算损失

        self.scheduler.step(loss,epoch)

        # 反向传播
        self.scheduler.optimizer.zero_grad()  # 清空梯度
        loss.backward()  # 反向传播
        self.scheduler.optimizer.step()  # 更新参数

        avg_loss = loss.item()
        if epoch % 100 == 0:
            #logger.info(f'Epoch [{epoch}/{self.nepochs}]')
            self.writer.add_scalar('Loss/train', avg_loss, epoch)  # 记录损失到 TensorBoard
            for k,v in loss_details.items():
                self.writer.add_scalar('Loss/'+k, v.item(), epoch) 
        if epoch % self.conf.get_int('train.checkpoint_frequency') == 0:  # 每隔一定轮次保存检查点
            self.save_checkpoints(epoch)

#============================ 前向传播 数据准备 ============================
    def compute_patch(self, n_branch, n_patch_batch, n_patch_last, feature_mask_cpu):
        '''
        计算每个分支的补丁数量
        '''
        patch_id = []
        patch_id_n = []
        for i in range(n_branch):
            patch_id = patch_id + [np.where(feature_mask_cpu == i + 1)[0]]
            patch_id_n = patch_id_n + [patch_id[i].shape[0]]
        return patch_id, patch_id_n

    def get_branch_mask(self, n_branch, n_patch_batch, n_patch_last):
        '''
        branch_mask: 分支掩码，用于表示每个分支在每个批次中的掩码。每一行对应一个分支，每一列对应一个样本。用于表示每个分支的补丁是否被选中。
        single_branch_mask_gt: 单分支掩码，用于表示每个补丁属于哪个分支。每一行对应一个样本，每一列对应一个分支。用于表示每个补丁属于哪个分支。
        single_branch_mask_id: 单分支 ID，用于表示每个补丁属于哪个分支。
        作用：
        '''
        branch_mask = torch.zeros(n_branch, n_patch_batch).cuda()
        single_branch_mask_gt = torch.zeros(n_patch_batch, n_branch).cuda()
        single_branch_mask_id = torch.zeros([n_patch_batch], dtype=torch.long).cuda()
        for i in range(n_branch - 1):
            branch_mask[i, i * n_patch_batch : (i + 1) * n_patch_batch] = 1.0
            single_branch_mask_gt[i * n_patch_batch : (i + 1) * n_patch_batch, i] = 1.0
            single_branch_mask_id[i * n_patch_batch : (i + 1) * n_patch_batch] = i
        branch_mask[n_branch - 1, (n_branch - 1) * n_patch_batch:] = 1.0
        single_branch_mask_gt[(n_branch - 1) * n_patch_batch:, (n_branch - 1)] = 1.0
        single_branch_mask_id[(n_branch - 1) * n_patch_batch:] = (n_branch - 1)
        return branch_mask, single_branch_mask_gt, single_branch_mask_id

    def get_indices(self, patch_id, patch_id_n, n_patch_batch, n_patch_last, n_branch):
        indices = torch.empty(0, dtype=torch.int64).cuda()
        for i in range(n_branch - 1):
            indices_nonfeature = torch.tensor(patch_id[i][np.random.choice(patch_id_n[i], n_patch_batch, True)]).cuda()
            indices = torch.cat((indices, indices_nonfeature), 0)
        # last patch
        indices_nonfeature = torch.tensor(patch_id[n_branch - 1][np.random.choice(patch_id_n[n_branch - 1], n_patch_last, True)]).cuda()
        indices = torch.cat((indices, indices_nonfeature), 0)
        return indices

    def compute_local_sigma(self):
        """计算局部sigma值"""
        try:
            sigma_set = []
            data_cpu = self.data.detach().cpu().numpy()
            ptree = cKDTree(data_cpu)
            logger.debug("KD tree constructed")
            
            for p in np.array_split(data_cpu, 100, axis=0):
                d = ptree.query(p, 50 + 1)
                sigma_set.append(d[0][:, -1])
            
            sigmas = np.concatenate(sigma_set)
            self.local_sigma = torch.from_numpy(sigmas).float().cuda()
        except Exception as e:
            logger.error(f"Error computing local sigma: {str(e)}")
            raise


#============================ 保存模型 ============================
    def init_checkpoints(self):
        self.checkpoints_path = os.path.join("../exps/single_shape",name_prefix, "checkpoints")
        self.ModelParameters_path = os.path.join(self.checkpoints_path, "ModelParameters")
        self.OptimizerParameters_path = os.path.join(self.checkpoints_path, "OptimizerParameters")
        self.TorchScript_path = os.path.join(self.checkpoints_path, "TorchScript")

        # 创建目录
        os.makedirs(self.ModelParameters_path, exist_ok=True)
        os.makedirs(self.OptimizerParameters_path, exist_ok=True)
        os.makedirs(self.TorchScript_path, exist_ok=True)

    def save_checkpoints(self, epoch):
        torch.save(
            {"epoch": epoch, "model_state_dict": self.model.state_dict()},
            os.path.join(self.ModelParameters_path, str(epoch) + ".pth"))
        torch.save(
            {"epoch": epoch, "model_state_dict": self.model.state_dict()},
            os.path.join(self.ModelParameters_path, "latest.pth"))
        torch.save(
            {"epoch": epoch, "optimizer_state_dict": self.scheduler.optimizer.state_dict()},
            os.path.join(self.OptimizerParameters_path, str(epoch) + ".pth"))
        torch.save(
            {"epoch": epoch, "optimizer_state_dict": self.scheduler.optimizer.state_dict()},
            os.path.join(self.OptimizerParameters_path, "latest.pth"))
        
    def tracing(self):
        csg_tree, flag_convex = self.dataset.get_csg_tree()
        network = get_class(self.conf.get_string('train.network_class'))(
                d_in=self.d_in,
                n_branch=int(torch.max(self.feature_mask).item()),
                csg_tree=csg_tree,
                flag_convex=flag_convex,
                **self.conf.get_config('network.inputs')
            ).to(self.device)
        #trace
        example = torch.rand(224,3).to(self.device)
        traced_script_module = torch.jit.trace(network, example)
        traced_script_module.save(os.path.join(self.TorchScript_path, "model_h.pt"))


if __name__ == "__main__":
    name_prefix = 'broken_bullet_50k'
    conf = ConfigFactory.parse_file('./conversion/setup.conf') 
    try:
        train = NHREPNet_Training(name_prefix, conf, if_baseline=True, if_feature_sample=False)
        train.run_nhrepnet_training()
    except Exception as e:
        logger.error(str(e))