from torch import nn from torch.autograd import Function import torch import numpy as np import time #import feconv_cuda #%% data_pre def data_pre_dtype(batchsize = 1,resolution = 40,isFloat64=False): Ke,Fe,edofMat = data_pre(batchsize,resolution) if isFloat64: Ke = Ke.astype(np.float64) Fe = Fe.astype(np.float64) return Ke,Fe,edofMat def data_pre(batchsize = 1,resolution = 40): E = 1e6; nu = 0.3 from ConstMatricesForHomogenization import ISOelasticitytensor,LocalKeFe D0 = ISOelasticitytensor(E, nu) Ke,Fe,intB = LocalKeFe(resolution,D0) # h = 1.0/resolution # nele = resolution**3 # I = np.eye(6) # datashape = resolution from PeriodicMesh3D import PeriodicMesh3D,edofMatrix eleidx,MESH,V = PeriodicMesh3D(resolution) # from mesh3D import mesh3D,edofMatrix # eleidx,MESH,V = mesh3D(resolution) edofMat = edofMatrix(MESH) print('====================== data prepared for homogenization') # Ke = np.ones(Ke.shape,dtype = Ke.dtype) # Ke = np.eye(24,dtype = Ke.dtype) # Ke[0,1] = 1 return Ke,Fe,edofMat #============================================================================= #%% FEconv_cuda def feconvNet_periodicU_check(U,rho,Ke,ibatch,outidx,Idxx,Idxy,Idxz): Up,H8types,nodIdx,filters = datapre_feconv(U,rho,Ke) from feconv import FEconvNet_periodicU_H8types print('FECONV imported') device = torch.device(f"cuda:{0}" if torch.cuda.is_available() else "cpu") print("DEVICE : ", device) # U = U.to(device) H8types = H8types.to(device) rho = rho.to(device) nodIdx = nodIdx.to(device) filters = filters.to(device) # print("INPUT info.:----------------------------------") # print('* U :',U.cpu().numpy().shape,U.dtype,U.sum().cpu().numpy()) # print('* H8types :',H8types.cpu().numpy().shape,H8types.dtype,H8types.sum().cpu().numpy(),H8types.min().cpu().numpy(),H8types.max().cpu().numpy()) # print('* nodIdx :',nodIdx.cpu().numpy().shape,nodIdx.dtype,nodIdx.sum().cpu().numpy(),nodIdx.min().cpu().numpy(),nodIdx.max().cpu().numpy()) # print('* filters :',filters.cpu().numpy().shape,filters.dtype,abs(filters).sum().cpu().numpy()) steps = 1 convOP = FEconvNet_periodicU_H8types(Ke).to(device) start = time.perf_counter() for i in range(steps): # KU = convOP(U,rho,nodIdx,filters) KU = convOP(U,H8types) # KU = FEconvFunction.apply(U,H8types,nodIdx,filters) uku = (KU*U).sum((2,3,4)) uku1 = uku.view(-1,6,3).sum((2)) elapsed = time.perf_counter() - start print(f"{steps} steps, {elapsed/steps} s/step") print("OUTPUT info.:---------------------------------") print('* KU :',KU.shape,KU.dtype,KU.device) print('* U :',U.shape,U.dtype,U.device) #uku2 = uku.view(-1,3,6).sum((1)) print('* UKU :',uku1.shape,uku1.dtype,uku1.device) #print(uku1.cpu().numpy()[0,0]) #sum0 = uku[0,:3].sum() #print(uku1.shape, uku1[0,0]-sum0) print(uku1.shape,"* ",uku1.cpu().detach().numpy()) # ibatch = 0; outidx = 1; Idxx = 1; Idxy = 2; Idxz =3; # FEconv_PyCheck_st(U,H8types,filters,nodIdx,ibatch,outidx,Idxx,Idxy,Idxz) # print(f'KU[{ibatch},{outidx},{Idxx},{Idxy},{Idxz}] = ',KU[ibatch,outidx,Idxx,Idxy,Idxz]) print(f'KU[{ibatch},{outidx},{Idxx},{Idxy},{Idxz}] = ',KU[ibatch,outidx,Idxx,Idxy,Idxz]) print(f'U[{ibatch},{outidx},{Idxx},{Idxy},{Idxz}] = ',U[ibatch,outidx,Idxx,Idxy,Idxz]) # print("******************** Gradients") # L = uku1.sum() # print("L = ",L) # L.backward() # gradU = U_40.grad() # print(f"gradU.shape = {gradU.shape}") return uku1.sum() def FEconv_runTwice(U,rho,Ke): from feconv import FEconvNet_periodicU_H8types convOP = FEconvNet_periodicU_H8types(Ke).to(device) from getTypeH8 import typeH8 from periodicU import periodicU start = time.perf_counter() H8types = typeH8(rho).to(device) elapsed = time.perf_counter() - start print(f"preprocess rho: {elapsed} s") start = time.perf_counter() U = periodicU(U).to(device) elapsed = time.perf_counter() - start print(f"preprocess U: {elapsed} s") steps = 10 for i in range(10): start = time.perf_counter() KU = convOP(U,H8types) elapsed = time.perf_counter() - start print(f"Round {i+1}: {elapsed} s, KU.sum() = {KU.sum()}") # start = time.perf_counter() # KU = convOP(U,H8types) # elapsed = time.perf_counter() - start # print(f"2nd round: {elapsed} s") # print(KU.sum()) # return KU def feconvNet_check(U,rho,Ke,ibatch,outidx,Idxx,Idxy,Idxz): U_bak,H8types,nodIdx,filters = datapre_feconv(U,rho,Ke) print("======================= FEconvNet--------------") from feconv import FEconvNet print('FECONV imported') device = torch.device(f"cuda:{0}" if torch.cuda.is_available() else "cpu") print("DEVICE : ", device) # U = U.to(device) # H8types = H8types.to(device) rho = rho.to(device) nodIdx = nodIdx.to(device) filters = filters.to(device) # print("INPUT info.:----------------------------------") # print('* U :',U.cpu().numpy().shape,U.dtype,U.sum().cpu().numpy()) # print('* H8types :',H8types.cpu().numpy().shape,H8types.dtype,H8types.sum().cpu().numpy(),H8types.min().cpu().numpy(),H8types.max().cpu().numpy()) # print('* nodIdx :',nodIdx.cpu().numpy().shape,nodIdx.dtype,nodIdx.sum().cpu().numpy(),nodIdx.min().cpu().numpy(),nodIdx.max().cpu().numpy()) # print('* filters :',filters.cpu().numpy().shape,filters.dtype,abs(filters).sum().cpu().numpy()) steps = 1 convOP = FEconvNet().to(device) U_40 = U start = time.perf_counter() for i in range(steps): KU,U = convOP(U_40,rho,nodIdx,filters) # KU = FEconvFunction.apply(U,H8types,nodIdx,filters) uku = (KU*U).sum((2,3,4)) uku1 = uku.view(-1,6,3).sum((2)) elapsed = time.perf_counter() - start print(f"{steps} steps, {elapsed/steps} s/step") print("OUTPUT info.:---------------------------------") print('* KU :',KU.shape,KU.dtype,KU.device) print('* U :',U.shape,U.dtype,U.device) #uku2 = uku.view(-1,3,6).sum((1)) print('* UKU :',uku1.shape,uku1.dtype,uku1.device) #print(uku1.cpu().numpy()[0,0]) #sum0 = uku[0,:3].sum() #print(uku1.shape, uku1[0,0]-sum0) print(uku1.cpu().detach().numpy()) # ibatch = 0; outidx = 1; Idxx = 1; Idxy = 2; Idxz =3; # FEconv_PyCheck_st(U,H8types,filters,nodIdx,ibatch,outidx,Idxx,Idxy,Idxz) # print(f'KU[{ibatch},{outidx},{Idxx},{Idxy},{Idxz}] = ',KU[ibatch,outidx,Idxx,Idxy,Idxz]) print(f'KU[{ibatch},{outidx},{Idxx},{Idxy},{Idxz}] = ',KU[ibatch,outidx,Idxx,Idxy,Idxz]) print(f'U[{ibatch},{outidx},{Idxx},{Idxy},{Idxz}] = ',U[ibatch,outidx,Idxx,Idxy,Idxz]) # print("******************** Gradients") # L = uku1.sum() # print("L = ",L) # L.backward() # gradU = U_40.grad() # print(f"gradU.shape = {gradU.shape}") def datapre_feconv(U,rho,Ke): from periodicU import periodicU U = periodicU(U) from getTypeH8 import typeH8 H8types = typeH8(rho) H8types = H8types.int() from arrangeIndex import arrangeIndex nodIdx = arrangeIndex() # nodIdx = nodIdx.astype(np.int32) from symbolicExec_vec2 import getFilters filters = getFilters(Ke) # filters = filters.astype(np.float32) nodIdx = torch.from_numpy(nodIdx) filters = torch.from_numpy(filters) print('====================== data prepared for FEconv') print('* U:', U.shape, U.dtype) print('* H8types:',H8types.shape, H8types.dtype) print('* nodIdx:', nodIdx.shape, nodIdx.dtype) print('* filters:',filters.shape, filters.dtype) return U,H8types,nodIdx,filters def feconv_check(U,rho,Ke,ibatch,outidx,Idxx,Idxy,Idxz): U,H8types,nodIdx,filters = datapre_feconv(U,rho,Ke) from feconv import FECONV from feconv import FEconvFunction print('FECONV imported') device = torch.device(f"cuda:{0}" if torch.cuda.is_available() else "cpu") print("DEVICE : ", device) U = U.to(device) H8types = H8types.to(device) nodIdx = nodIdx.to(device) filters = filters.to(device) # print("INPUT info.:----------------------------------") # print('* U :',U.cpu().numpy().shape,U.dtype,U.sum().cpu().numpy()) # print('* H8types :',H8types.cpu().numpy().shape,H8types.dtype,H8types.sum().cpu().numpy(),H8types.min().cpu().numpy(),H8types.max().cpu().numpy()) # print('* nodIdx :',nodIdx.cpu().numpy().shape,nodIdx.dtype,nodIdx.sum().cpu().numpy(),nodIdx.min().cpu().numpy(),nodIdx.max().cpu().numpy()) # print('* filters :',filters.cpu().numpy().shape,filters.dtype,abs(filters).sum().cpu().numpy()) steps = 10 convOP = FECONV().to(device) start = time.perf_counter() # for i in range(steps): KU = convOP(U,H8types,nodIdx,filters) # KU = FEconvFunction.apply(U,H8types,nodIdx,filters) uku = (KU*U).sum((2,3,4)) uku1 = uku.view(-1,6,3).sum((2)) elapsed = time.perf_counter() - start print(f"elapsed in {elapsed} s") print("OUTPUT info.:---------------------------------") print('* KU :',KU.shape,KU.dtype,KU.device) print('* U :',U.shape,U.dtype,U.device) #uku2 = uku.view(-1,3,6).sum((1)) print('* UKU :',uku1.shape,uku1.dtype,uku1.device) #print(uku1.cpu().numpy()[0,0]) #sum0 = uku[0,:3].sum() #print(uku1.shape, uku1[0,0]-sum0) print(uku1.cpu().numpy()) # ibatch = 0; outidx = 1; Idxx = 1; Idxy = 2; Idxz =3; # FEconv_PyCheck_st(U,H8types,filters,nodIdx,ibatch,outidx,Idxx,Idxy,Idxz) # print(f'KU[{ibatch},{outidx},{Idxx},{Idxy},{Idxz}] = ',KU[ibatch,outidx,Idxx,Idxy,Idxz]) print(f'KU[{ibatch},{outidx},{Idxx},{Idxy},{Idxz}] = ',KU[ibatch,outidx,Idxx,Idxy,Idxz]) #%% element-wise def originalMethod_check(output_img,input,Ke, edofMat): input=input.permute((0,1,4,3,2)) size = input.shape[0] # 3d rho的顺序? pp = input.contiguous().view(size, -1, 1, 1)#.to(device) K = pp * Ke # [bs, 8000, 24, 24] # F = pp * Fe # [bs, 8000, 24, 6] #output_img = output_img.permute((0,1,4,3,2)) ref_map = Utensor2vec(output_img) U = ref_map[:, edofMat, :]#[bs,40^3,24,6] UT = U.permute([0, 1, 3, 2]) # losst1 = torch.matmul(torch.matmul(UT, K), U).sum() # print(losst1) # FU = (U * F).sum() # losst1 = UKU = torch.matmul(torch.matmul(UT, K), U) UKU0 = UKU[:,:,0,0].sum() UKU1 = UKU[:,:,1,1].sum() UKU2 = UKU[:,:,2,2].sum() UKU3 = UKU[:,:,3,3].sum() UKU4 = UKU[:,:,4,4].sum() UKU5 = UKU[:,:,5,5].sum() losst1 = UKU0+UKU1+UKU2+UKU3+UKU4+UKU5 print(np.array([UKU0.item(), UKU1.item(), UKU2.item(), UKU3.item(), UKU4.item(), UKU5.item()])) return losst1 def oricheck(U,rho,Ke,edofMat): device = torch.device(f"cuda:{0}" if torch.cuda.is_available() else "cpu") print("DEVICE : ", device) edofMat = torch.from_numpy(edofMat).to(device).long() Ke = torch.from_numpy(Ke).to(device) rho = rho.to(device) # U = U.to(device) print("INPUT info.:----------------------------------") print('* U :',U.cpu().detach().numpy().shape,U.dtype,U.sum().cpu().detach().numpy()) print('* rho :',rho.cpu().numpy().shape,rho.dtype,rho.sum().cpu().numpy(),rho.min().cpu().numpy(),rho.max().cpu().numpy()) print('* edofMat :',edofMat.cpu().numpy().shape,edofMat.dtype,edofMat.sum().cpu().numpy(),edofMat.min().cpu().numpy(),edofMat.max().cpu().numpy()) print('* Ke :',Ke.cpu().numpy().shape,Ke.dtype,abs(Ke).sum().cpu().numpy()) # resolution = U.shape[2] # U = torch.rand((1,18,resolution,resolution,resolution),dtype = torch.float64,device=device,requires_grad=True) # U = U.requires_grad_() start = time.perf_counter() losst1 = originalMethod_check(U,rho,Ke, edofMat) elapsed = time.perf_counter() - start print(f"elapsed in {elapsed} s") return losst1 #%% assemble def assembleKU_periodic(rho,U,Ke,Fe,edofMat,ibatch,outidx,Idxx,Idxy,Idxz): print('=============assembleKU-------Periodic B.C.---------------') x = rho[0,0].cpu().numpy() x = np.transpose(x,(2,1,0)) resolution = x.shape[0] nele = resolution**3; ndof = nele*3 K,F = AssembleGlobalKF(x,Ke,Fe,edofMat,ndof,nele) print(K.dtype,K.shape) ref_map = Utensor2vec(U) uku = np.zeros((6)) for i in range(6): Uvec = ref_map[0,:,i].cpu().detach().numpy() uku[i] = Uvec @ (K@Uvec) print(uku) def assembleKU_check(rho,U,Ke,Fe,edofMat,ibatch,outidx,Idxx,Idxy,Idxz): from periodicU import periodicU U = periodicU(U) x = rho[0,0].cpu().numpy() # resolution = x.shape[0] # nele = resolution**3; ndof = nele#3*(resolution+1)**3 x = np.transpose(x,(2,1,0)) resolution = x.shape[0] nele = resolution**3; ndof = 3*(resolution+1)**3 from mesh3D import mesh3D,edofMatrix eleidx,MESH,V = mesh3D(resolution) edofMat = edofMatrix(MESH) K,F = AssembleGlobalKF(x,Ke,Fe,edofMat,ndof,nele) print(K.dtype,K.shape) ref_map = Utensor2vec(U) uku = np.zeros((6)) for i in range(6): Uvec = ref_map[0,:,i].cpu().detach().numpy() uku[i] = Uvec @ (K@Uvec) print(uku) imap = outidx//3 Uvec = ref_map[0,:,[imap]].cpu().detach().numpy() KU = K@Uvec print(f"imap = {imap},KU.shape = {KU.shape}") resolution = resolution + 1 inodidx = Idxz*resolution**2 + Idxy*resolution + Idxx idofidx = inodidx*3 + outidx % 3 print(f"inodidx = {inodidx}, idofidx = {idofidx}") print('*** assembleKU_i = ',KU[idofidx]) print('*** U_i = ',Uvec[idofidx]) # Uvec = np.ones((K.shape[0],1)) # print(Uvec[3:].T @ (K[3:,:][:,3:]@Uvec[3:])) # Uvec[:3] = 0 # print(Uvec.T @ (K@Uvec)) def AssembleGlobalK(x,Ke,edofMat,ndof,nele): from scipy import sparse penal = 1; rho = x.flatten()#[:nele] Emin = 0#1e-9; rho = np.maximum(rho,Emin) iK = np.kron(edofMat,np.ones((24,1))).flatten() jK = np.kron(edofMat,np.ones((1,24))).flatten() sK = ((Ke.flatten()[np.newaxis]).T*(rho**penal)).flatten(order='F') K = sparse.coo_matrix((sK, (iK, jK)), shape=(ndof, ndof)).tocsc() return K def AssembleGlobalKF(x,Ke,Fe,edofMat,ndof,nele): from scipy import sparse penal = 1; rho = x.flatten()#[:nele] Emin = 0#1e-9; rho = np.maximum(rho,Emin) iK = np.kron(edofMat,np.ones((24,1))).flatten() jK = np.kron(edofMat,np.ones((1,24))).flatten() sK = ((Ke.flatten()[np.newaxis]).T*(rho**penal)).flatten(order='F') K = sparse.coo_matrix((sK, (iK, jK)), shape=(ndof, ndof)).tocsc() iF = np.kron(edofMat,np.ones((6,1))).flatten() jF = np.kron(np.repeat(np.array(range(6))[np.newaxis],nele,axis = 0),np.ones((1,24))).flatten() sF = ((Fe.flatten(order='F')[np.newaxis]).T*(rho**penal)).flatten(order='F') F = sparse.coo_matrix((sF, (iF, jF)), shape=(ndof, 6)).tocsc() return K,F #%% others def KUperEleComputation(): resolution = 2 from mesh3D import mesh3D,edofMatrix eleidx,MESH,V = mesh3D(resolution) edofMat = edofMatrix(MESH) idofidx = 39 nele = edofMat.shape[0] for iele in range(nele): edof = edofMat[iele,:] for j in range(24): if edof[j]==idofidx: for k in range(24): print(f"Ke[{j},{k}]*U[{edof[k]}] + ",end = ' ') # Ke[j,:]*U[edof] print(' ') def Utensor2vec(U): if len(U.shape)==5:#[bs,18,resolution,resolution,resolution] size = U.shape[0] U = U.permute((0,1,4,3,2)) if U.shape[1]==18: ref18 = U.contiguous().view(size,18,-1) #[bs,18,40**3] permuteList = (0,2,1) map0 = ref18[:,0:3].permute(permuteList).contiguous().view(size,-1,1) map1 = ref18[:,3:6].permute(permuteList).contiguous().view(size,-1,1) map2 = ref18[:,6:9].permute(permuteList).contiguous().view(size,-1,1) map3 = ref18[:,9:12].permute(permuteList).contiguous().view(size,-1,1) map4 = ref18[:,12:15].permute(permuteList).contiguous().view(size,-1,1) map5 = ref18[:,15:18].permute(permuteList).contiguous().view(size,-1,1) ref_map = torch.cat([map0,map1,map2,map3,map4,map5], 2)# [bs,3*40**3,6] if U.shape[1]==3:#[bs,3,resolution,resolution,resolution] ref3 = U.contiguous().view(size,3,-1) #[bs,18,40**3] ref_map = ref3.permute((0,2,1)).contiguous().view(size,-1,1)# [bs,3*40**3,1] if len(U.shape)==4:#[3,resolution,resolution,resolution] size = 1 ref3 = U.contiguous().view(size,3,-1) #[bs,18,40**3] ref_map = ref3.permute((0,2,1)).contiguous().view(size,-1,1)# [bs,3*40**3,1] return ref_map def fixeddofs(resolution): node = [] for i in [0,resolution]: for j in [0,resolution]: for k in [0,resolution]: nodeidx = i*resolution**2 + j*resolution + k node.append(nodeidx) fixed = [] for i in node: for j in range(3): fixed.append(i*3+j) return fixed def filtercheck(Ke): from symbolicExec_vec2 import getFilters filters = getFilters(Ke) from symbolicExec_vec2 import symbolicExec theta = symbolicExec(Ke,1,1,1,1,1,1,1,1) print("---Ke----filters----theta---") print(Ke.dtype,filters[255,].dtype,theta.dtype) print(Ke.sum(),filters[255,].sum(),theta.sum()) print(Ke[21,0],filters[255,0,0,0],theta[0,0,0]) for ix in range(3): for iy in range(3): idxx = list(np.arange(ix,24,3)) idxy = list(np.arange(iy,24,3)) print(f"ix={ix},iy={iy}: ",Ke[idxx,:][:,idxy].shape, Ke[idxx,:][:,idxy].sum(),filters[255,ix,iy].sum(),theta[ix,iy].sum()) filters = torch.from_numpy(filters) print(Ke.dtype,filters[255,].dtype,theta.dtype) print(Ke.sum(),filters[255,].sum(),theta.sum()) print(Ke[21,0],filters[255,0,0,0],theta[0,0,0]) for ix in range(3): for iy in range(3): idxx = list(np.arange(ix,24,3)) idxy = list(np.arange(iy,24,3)) print(f"ix={ix},iy={iy}: ",Ke[idxx,:][:,idxy].shape, Ke[idxx,:][:,idxy].sum(),filters[255,ix,iy].sum(),theta[ix,iy].sum()) filters = filters.float() theta = theta.astype(np.float32) Ke = Ke.astype(np.float32) print(Ke.dtype,filters[255,].dtype,theta.dtype) for ix in range(3): for iy in range(3): idxx = list(np.arange(ix,24,3)) idxy = list(np.arange(iy,24,3)) print(f"ix={ix},iy={iy}: ",Ke[idxx,:][:,idxy].shape, Ke[idxx,:][:,idxy].sum(),filters[255,ix,iy].sum(),theta[ix,iy].sum()) #%% FEconv_Pycheck def FEconv_Pycheck_varU(U,rho,Ke,ibatch,outidx,Idxx,Idxy,Idxz): x = rho[0,0].cpu().numpy() resolution = x.shape[0] nele = resolution**3; ndof = 3*(resolution+1)**3 from mesh3D import mesh3D,edofMatrix eleidx,MESH,V = mesh3D(resolution) edofMat = edofMatrix(MESH) K = AssembleGlobalK(x,Ke,edofMat,ndof,nele) inodidx = Idxx*(resolution+1)**2 + Idxy*(resolution+1) + Idxz idofidx = inodidx*3 + outidx % 3 print(f"outidx,Idxx,Idxy,Idxz,inodidx,idofidx = {outidx,Idxx,Idxy,Idxz,inodidx,idofidx}") U,H8types,nodIdx,filters = datapre_feconv(U,rho,Ke) for i in range(3): for ix in range(2): for iy in range(2): for iz in range(2): U2 = U U2[0,i,ix,iy,iz]=2 print(f"(i,ix,iy,iz) = {i,ix,iy,iz}") ref_map = Utensor2vec(U2) Uvec = ref_map[ibatch,:,outidx // 3] Uvec = Uvec.numpy() KU = K@Uvec print(f', assembleKU_[{idofidx}] = ',KU[idofidx],end='') convresult = FEconv_PyCheck_st(U2,H8types,filters,nodIdx,ibatch,outidx,Idxx,Idxy,Idxz) def FEconv_PyCheck(U,rho,Ke,ibatch,outidx,Idxx,Idxy,Idxz): U,H8types,nodIdx,filters = datapre_feconv(U,rho,Ke) ''' UKU = 0 for outidx in range(3): for Idxx in range(40): for Idxy in range(40): for Idxz in range(40): convresult = FEconv_PyCheck_st(U,H8types,filters,nodIdx,ibatch,outidx,Idxx,Idxy,Idxz) UKU += convresult print('ConvUKU = ',UKU) ''' convresult = FEconv_PyCheck_st(U,H8types,filters,nodIdx,ibatch,outidx,Idxx,Idxy,Idxz) # for i in range(8): # h8type = 2**i # convresult = FEconv_PyCheck_st2(U,h8type,filters,nodIdx,ibatch,outidx,Idxx,Idxy,Idxz) x = rho[0,0].cpu().numpy() x = np.transpose(x,(2,1,0)) resolution = x.shape[0] nele = resolution**3; ndof = 3*(resolution+1)**3 from mesh3D import mesh3D,edofMatrix eleidx,MESH,V = mesh3D(resolution) edofMat = edofMatrix(MESH) K = AssembleGlobalK(x,Ke,edofMat,ndof,nele) # print('global K: ',K.dtype,K.shape) ref_map = Utensor2vec(U) Uvec = ref_map[ibatch,:,outidx // 3] Uvec = Uvec.numpy() # Uvec = np.ones((K.shape[0]),dtype = K.dtype) KU = K@Uvec # inodidx = Idxx*(resolution+1)**2 + Idxy*(resolution+1) + Idxz inodidx = Idxz*(resolution+1)**2 + Idxy*(resolution+1) + Idxx idofidx = inodidx*3 + outidx % 3 print(f"outidx,Idxx,Idxy,Idxz,inodidx,idofidx = {outidx,Idxx,Idxy,Idxz,inodidx,idofidx}") print(f'*** assembleKU_[{idofidx}] = ',KU[idofidx]) # print(KU.T) # ndofs = K.shape[0] # dofs=np.arange(ndofs) # fixed = fixeddofs(resolution) # fixed = np.array(fixed) # free=np.setdiff1d(dofs,fixed) ''' for i in range(ndofs): if K[idofidx,i] != 0: print(f" Uvec[{i}] * K[{idofidx},{i}] = {Uvec[i]} * {K[idofidx,i]}") ''' # print(f"UKU = {np.ones((1,K.shape[0]),dtype = K.dtype)[:, free]@KU[free]}") def FEconv_PyCheck_st(U,H8types,filters,nodIdx,ibatch,outidx,Idxx,Idxy,Idxz): print('-----------------------------------------FEconv_PyCheck_st') convresult = 0 h8type = H8types[ibatch,0,Idxx,Idxy,Idxz] direction = outidx % 3 print(f"h8type = {h8type}, direction = {direction}") for j in range(27): # uidx1 = nodIdx[Idxz][Idxy][Idxx][j][0]; # uidx2 = nodIdx[Idxz][Idxy][Idxx][j][1]; # uidx3 = nodIdx[Idxz][Idxy][Idxx][j][2]; uidx1 = nodIdx[Idxx][Idxy][Idxz][j][0]; uidx2 = nodIdx[Idxx][Idxy][Idxz][j][1]; uidx3 = nodIdx[Idxx][Idxy][Idxz][j][2]; if ((uidx1+1)*(uidx2+1)*(uidx3+1)!=0): # print(f' j={j}, uidx1={uidx1}, uidx2={uidx2}, uidx3={uidx3}') for ix in range(3): # print(f'ix={ix}, j={j}, uidx1={uidx1}, uidx2={uidx2}, uidx3={uidx3}') # convresult += U[ibatch][outidx - direction + ix][uidx1][uidx2][uidx3] * filters[h8type][ix][direction][j]; # convresult += U[ibatch][outidx - direction + ix][uidx1][uidx2][uidx3] * filters[h8type][direction][ix][j]; convresult += U[ibatch][outidx - direction + ix][uidx1][uidx2][uidx3] * filters[h8type][direction][ix][j]; # ix=0 # print(f"U[{outidx - direction + ix}][{uidx1}][{uidx2}][{uidx3}] * filters[{direction}][{ix}][{j}] = {U[ibatch][outidx - direction + ix][uidx1][uidx2][uidx3]} * {filters[h8type][direction][ix][j]}") print('convresult = ',convresult.numpy()) # tmp = filters[10,:,2,:] # print(tmp.sum(),tmp[:,[1,2,4,5,7,8,10,11,13,14,16,17]].sum()) return convresult #%% main if __name__ == "__main__": print('modify mark 0') device = torch.device(f"cuda:{0}" if torch.cuda.is_available() else "cpu") print("DEVICE : ", device) ibatch = 0; outidx = 1; Idxx = 31; Idxy = 11; Idxz = 21; #%%% rho,U generate # ======================================================================== batchsize = 64; resolution = 40 randU=True; randrho=True; isFloat64=True if isFloat64: torchtype = torch.float64 else: torchtype = torch.float32 print(f'**** batchsize = {batchsize}, resolution = {resolution} ****') if randU: U = torch.rand((batchsize,18,resolution,resolution,resolution),dtype = torchtype,device=device) else: U = torch.ones((batchsize,18,resolution,resolution,resolution),dtype = torchtype,device=device) if randrho: rho = torch.rand((batchsize,1,resolution,resolution,resolution),dtype = torchtype) else: rho = torch.ones((batchsize,1,resolution,resolution,resolution),dtype = torchtype) from periodicU import periodicU U = periodicU(U) U.requires_grad_() rho[rho<=0.5] = 0 rho[rho >0.5] = 1 print(f"U is random: **{randU}**, rho is random: **{randrho}**") print(f'rho is discret: **{torch.abs((rho-1)*rho).max().item() == 0}**' ) print(f"U.requires_grad={U.requires_grad}, U.is_leaf = {U.is_leaf}") #%%% Hom. Param.s # ======================================================================== Ke,Fe,edofMat = data_pre_dtype(batchsize = 1,resolution = resolution, isFloat64=True) # import h5py # matFile = "G:\FangCloudV2\个人文件\WorkFiles\MMC_DNN\morphology\Kematlab.mat" # matData = h5py.File(matFile,'r') # Ke = np.transpose( matData['Ke'][()]) # Ke = np.random.rand(24,24) # for i in range(24): # for j in range(i,24): # Ke[i,j] = Ke[j,i] # print('* Ke :',Ke.shape,Ke.dtype,abs(Ke).sum()) # filtercheck(Ke.astype(np.float64)) #%%% FEconv_test print("======================= FEconvNet ===============================") FEconv_runTwice(U,rho,Ke) # FEconv_Pycheck_varU(U,rho,Ke,ibatch,outidx,Idxx,Idxy,Idxz) # feconv_check(U,rho,Ke,ibatch,outidx,Idxx,Idxy,Idxz) L = feconvNet_periodicU_check(U,rho,Ke,ibatch,outidx,Idxx,Idxy,Idxz) # FEconv_PyCheck(U,rho,Ke,ibatch,outidx,Idxx,Idxy,Idxz) # ix = outidx % 3 # idxx = list(np.arange(ix,24,3)) # idxy = list(np.arange(24)) # # idxy = list(np.arange(iy,24,3)) # print(Ke[idxx,:][:,idxy].sum(),Ke[idxy,:][:,idxx].sum()) print("------------------------ Gradients ------------------------------") start = time.perf_counter() print("L = ",L) print(f"U.requires_grad={U.requires_grad}, U.is_leaf = {U.is_leaf}") L.backward() gradU = U.grad print(f"gradU.shape = {gradU.shape}") # ibatch = 0; outidx = 1; Idxx = 1; Idxy = 11; Idxz = 21; print(f"*** gradU[{ibatch},{outidx},{Idxx},{Idxy},{Idxz}] = {gradU[ibatch,outidx,Idxx,Idxy,Idxz]}") print(f"*** gradU.sum() = {gradU.sum()}") elapsed = time.perf_counter() - start print(f"elapsed in {elapsed}s") # FEconv_PyCheck(U,rho,Ke) FEconv_runTwice(U,rho,Ke) ''' #%%% original_test print('==================== original method ============================') L = oricheck(U,rho,Ke,edofMat) print("------------------------ Gradients ------------------------------") start = time.perf_counter() print("L = ",L) print(f"U.requires_grad={U.requires_grad}, U.is_leaf = {U.is_leaf}") L.backward() gradU = U.grad print(f"gradU.shape = {gradU.shape}") # ibatch = 0; outidx = 1; Idxx = 1; Idxy = 11; Idxz = 21; print(f"*** gradU[{ibatch},{outidx},{Idxx},{Idxy},{Idxz}] = {gradU[ibatch,outidx,Idxx,Idxy,Idxz]}") print(f"*** gradU.sum() = {gradU.sum()}") elapsed = time.perf_counter() - start print(f"elapsed in {elapsed}s") #%%% assemble_test print('===================== assembleKU ================================') assembleKU_check(rho,U,Ke,Fe,edofMat,ibatch,outidx,Idxx,Idxy,Idxz) assembleKU_periodic(rho,U,Ke,Fe,edofMat,ibatch,outidx,Idxx,Idxy,Idxz) '''