You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

752 lines
28 KiB

11 months ago
from torch import nn
from torch.autograd import Function
import torch
import numpy as np
import time
#import feconv_cuda
#%% data_pre
def data_pre_dtype(batchsize = 1,resolution = 40,isFloat64=False):
Ke,Fe,edofMat = data_pre(batchsize,resolution)
if isFloat64:
Ke = Ke.astype(np.float64)
Fe = Fe.astype(np.float64)
return Ke,Fe,edofMat
def data_pre(batchsize = 1,resolution = 40):
E = 1e6; nu = 0.3
from ConstMatricesForHomogenization import ISOelasticitytensor,LocalKeFe
D0 = ISOelasticitytensor(E, nu)
Ke,Fe,intB = LocalKeFe(resolution,D0)
# h = 1.0/resolution
# nele = resolution**3
# I = np.eye(6)
# datashape = resolution
from PeriodicMesh3D import PeriodicMesh3D,edofMatrix
eleidx,MESH,V = PeriodicMesh3D(resolution)
# from mesh3D import mesh3D,edofMatrix
# eleidx,MESH,V = mesh3D(resolution)
edofMat = edofMatrix(MESH)
print('====================== data prepared for homogenization')
# Ke = np.ones(Ke.shape,dtype = Ke.dtype)
# Ke = np.eye(24,dtype = Ke.dtype)
# Ke[0,1] = 1
return Ke,Fe,edofMat
#=============================================================================
#%% FEconv_cuda
def feconvNet_periodicU_check(U,rho,Ke,ibatch,outidx,Idxx,Idxy,Idxz):
Up,H8types,nodIdx,filters = datapre_feconv(U,rho,Ke)
from feconv import FEconvNet_periodicU_H8types
print('FECONV imported')
device = torch.device(f"cuda:{0}" if torch.cuda.is_available() else "cpu")
print("DEVICE : ", device)
# U = U.to(device)
H8types = H8types.to(device)
rho = rho.to(device)
nodIdx = nodIdx.to(device)
filters = filters.to(device)
# print("INPUT info.:----------------------------------")
# print('* U :',U.cpu().numpy().shape,U.dtype,U.sum().cpu().numpy())
# print('* H8types :',H8types.cpu().numpy().shape,H8types.dtype,H8types.sum().cpu().numpy(),H8types.min().cpu().numpy(),H8types.max().cpu().numpy())
# print('* nodIdx :',nodIdx.cpu().numpy().shape,nodIdx.dtype,nodIdx.sum().cpu().numpy(),nodIdx.min().cpu().numpy(),nodIdx.max().cpu().numpy())
# print('* filters :',filters.cpu().numpy().shape,filters.dtype,abs(filters).sum().cpu().numpy())
steps = 1
convOP = FEconvNet_periodicU_H8types(Ke).to(device)
start = time.perf_counter()
for i in range(steps):
# KU = convOP(U,rho,nodIdx,filters)
KU = convOP(U,H8types)
# KU = FEconvFunction.apply(U,H8types,nodIdx,filters)
uku = (KU*U).sum((2,3,4))
uku1 = uku.view(-1,6,3).sum((2))
elapsed = time.perf_counter() - start
print(f"{steps} steps, {elapsed/steps} s/step")
print("OUTPUT info.:---------------------------------")
print('* KU :',KU.shape,KU.dtype,KU.device)
print('* U :',U.shape,U.dtype,U.device)
#uku2 = uku.view(-1,3,6).sum((1))
print('* UKU :',uku1.shape,uku1.dtype,uku1.device)
#print(uku1.cpu().numpy()[0,0])
#sum0 = uku[0,:3].sum()
#print(uku1.shape, uku1[0,0]-sum0)
print(uku1.shape,"* ",uku1.cpu().detach().numpy())
# ibatch = 0; outidx = 1; Idxx = 1; Idxy = 2; Idxz =3;
# FEconv_PyCheck_st(U,H8types,filters,nodIdx,ibatch,outidx,Idxx,Idxy,Idxz)
# print(f'KU[{ibatch},{outidx},{Idxx},{Idxy},{Idxz}] = ',KU[ibatch,outidx,Idxx,Idxy,Idxz])
print(f'KU[{ibatch},{outidx},{Idxx},{Idxy},{Idxz}] = ',KU[ibatch,outidx,Idxx,Idxy,Idxz])
print(f'U[{ibatch},{outidx},{Idxx},{Idxy},{Idxz}] = ',U[ibatch,outidx,Idxx,Idxy,Idxz])
# print("******************** Gradients")
# L = uku1.sum()
# print("L = ",L)
# L.backward()
# gradU = U_40.grad()
# print(f"gradU.shape = {gradU.shape}")
return uku1.sum()
def FEconv_runTwice(U,rho,Ke):
from feconv import FEconvNet_periodicU_H8types
convOP = FEconvNet_periodicU_H8types(Ke).to(device)
from getTypeH8 import typeH8
from periodicU import periodicU
start = time.perf_counter()
H8types = typeH8(rho).to(device)
elapsed = time.perf_counter() - start
print(f"preprocess rho: {elapsed} s")
start = time.perf_counter()
U = periodicU(U).to(device)
elapsed = time.perf_counter() - start
print(f"preprocess U: {elapsed} s")
steps = 10
for i in range(10):
start = time.perf_counter()
KU = convOP(U,H8types)
elapsed = time.perf_counter() - start
print(f"Round {i+1}: {elapsed} s, KU.sum() = {KU.sum()}")
# start = time.perf_counter()
# KU = convOP(U,H8types)
# elapsed = time.perf_counter() - start
# print(f"2nd round: {elapsed} s")
# print(KU.sum())
# return KU
def feconvNet_check(U,rho,Ke,ibatch,outidx,Idxx,Idxy,Idxz):
U_bak,H8types,nodIdx,filters = datapre_feconv(U,rho,Ke)
print("======================= FEconvNet--------------")
from feconv import FEconvNet
print('FECONV imported')
device = torch.device(f"cuda:{0}" if torch.cuda.is_available() else "cpu")
print("DEVICE : ", device)
# U = U.to(device)
# H8types = H8types.to(device)
rho = rho.to(device)
nodIdx = nodIdx.to(device)
filters = filters.to(device)
# print("INPUT info.:----------------------------------")
# print('* U :',U.cpu().numpy().shape,U.dtype,U.sum().cpu().numpy())
# print('* H8types :',H8types.cpu().numpy().shape,H8types.dtype,H8types.sum().cpu().numpy(),H8types.min().cpu().numpy(),H8types.max().cpu().numpy())
# print('* nodIdx :',nodIdx.cpu().numpy().shape,nodIdx.dtype,nodIdx.sum().cpu().numpy(),nodIdx.min().cpu().numpy(),nodIdx.max().cpu().numpy())
# print('* filters :',filters.cpu().numpy().shape,filters.dtype,abs(filters).sum().cpu().numpy())
steps = 1
convOP = FEconvNet().to(device)
U_40 = U
start = time.perf_counter()
for i in range(steps):
KU,U = convOP(U_40,rho,nodIdx,filters)
# KU = FEconvFunction.apply(U,H8types,nodIdx,filters)
uku = (KU*U).sum((2,3,4))
uku1 = uku.view(-1,6,3).sum((2))
elapsed = time.perf_counter() - start
print(f"{steps} steps, {elapsed/steps} s/step")
print("OUTPUT info.:---------------------------------")
print('* KU :',KU.shape,KU.dtype,KU.device)
print('* U :',U.shape,U.dtype,U.device)
#uku2 = uku.view(-1,3,6).sum((1))
print('* UKU :',uku1.shape,uku1.dtype,uku1.device)
#print(uku1.cpu().numpy()[0,0])
#sum0 = uku[0,:3].sum()
#print(uku1.shape, uku1[0,0]-sum0)
print(uku1.cpu().detach().numpy())
# ibatch = 0; outidx = 1; Idxx = 1; Idxy = 2; Idxz =3;
# FEconv_PyCheck_st(U,H8types,filters,nodIdx,ibatch,outidx,Idxx,Idxy,Idxz)
# print(f'KU[{ibatch},{outidx},{Idxx},{Idxy},{Idxz}] = ',KU[ibatch,outidx,Idxx,Idxy,Idxz])
print(f'KU[{ibatch},{outidx},{Idxx},{Idxy},{Idxz}] = ',KU[ibatch,outidx,Idxx,Idxy,Idxz])
print(f'U[{ibatch},{outidx},{Idxx},{Idxy},{Idxz}] = ',U[ibatch,outidx,Idxx,Idxy,Idxz])
# print("******************** Gradients")
# L = uku1.sum()
# print("L = ",L)
# L.backward()
# gradU = U_40.grad()
# print(f"gradU.shape = {gradU.shape}")
def datapre_feconv(U,rho,Ke):
from periodicU import periodicU
U = periodicU(U)
from getTypeH8 import typeH8
H8types = typeH8(rho)
H8types = H8types.int()
from arrangeIndex import arrangeIndex
nodIdx = arrangeIndex()
# nodIdx = nodIdx.astype(np.int32)
from symbolicExec_vec2 import getFilters
filters = getFilters(Ke)
# filters = filters.astype(np.float32)
nodIdx = torch.from_numpy(nodIdx)
filters = torch.from_numpy(filters)
print('====================== data prepared for FEconv')
print('* U:', U.shape, U.dtype)
print('* H8types:',H8types.shape, H8types.dtype)
print('* nodIdx:', nodIdx.shape, nodIdx.dtype)
print('* filters:',filters.shape, filters.dtype)
return U,H8types,nodIdx,filters
def feconv_check(U,rho,Ke,ibatch,outidx,Idxx,Idxy,Idxz):
U,H8types,nodIdx,filters = datapre_feconv(U,rho,Ke)
from feconv import FECONV
from feconv import FEconvFunction
print('FECONV imported')
device = torch.device(f"cuda:{0}" if torch.cuda.is_available() else "cpu")
print("DEVICE : ", device)
U = U.to(device)
H8types = H8types.to(device)
nodIdx = nodIdx.to(device)
filters = filters.to(device)
# print("INPUT info.:----------------------------------")
# print('* U :',U.cpu().numpy().shape,U.dtype,U.sum().cpu().numpy())
# print('* H8types :',H8types.cpu().numpy().shape,H8types.dtype,H8types.sum().cpu().numpy(),H8types.min().cpu().numpy(),H8types.max().cpu().numpy())
# print('* nodIdx :',nodIdx.cpu().numpy().shape,nodIdx.dtype,nodIdx.sum().cpu().numpy(),nodIdx.min().cpu().numpy(),nodIdx.max().cpu().numpy())
# print('* filters :',filters.cpu().numpy().shape,filters.dtype,abs(filters).sum().cpu().numpy())
steps = 10
convOP = FECONV().to(device)
start = time.perf_counter()
# for i in range(steps):
KU = convOP(U,H8types,nodIdx,filters)
# KU = FEconvFunction.apply(U,H8types,nodIdx,filters)
uku = (KU*U).sum((2,3,4))
uku1 = uku.view(-1,6,3).sum((2))
elapsed = time.perf_counter() - start
print(f"elapsed in {elapsed} s")
print("OUTPUT info.:---------------------------------")
print('* KU :',KU.shape,KU.dtype,KU.device)
print('* U :',U.shape,U.dtype,U.device)
#uku2 = uku.view(-1,3,6).sum((1))
print('* UKU :',uku1.shape,uku1.dtype,uku1.device)
#print(uku1.cpu().numpy()[0,0])
#sum0 = uku[0,:3].sum()
#print(uku1.shape, uku1[0,0]-sum0)
print(uku1.cpu().numpy())
# ibatch = 0; outidx = 1; Idxx = 1; Idxy = 2; Idxz =3;
# FEconv_PyCheck_st(U,H8types,filters,nodIdx,ibatch,outidx,Idxx,Idxy,Idxz)
# print(f'KU[{ibatch},{outidx},{Idxx},{Idxy},{Idxz}] = ',KU[ibatch,outidx,Idxx,Idxy,Idxz])
print(f'KU[{ibatch},{outidx},{Idxx},{Idxy},{Idxz}] = ',KU[ibatch,outidx,Idxx,Idxy,Idxz])
#%% element-wise
def originalMethod_check(output_img,input,Ke, edofMat):
input=input.permute((0,1,4,3,2))
size = input.shape[0]
# 3d rho的顺序?
pp = input.contiguous().view(size, -1, 1, 1)#.to(device)
K = pp * Ke # [bs, 8000, 24, 24]
# F = pp * Fe # [bs, 8000, 24, 6]
#output_img = output_img.permute((0,1,4,3,2))
ref_map = Utensor2vec(output_img)
U = ref_map[:, edofMat, :]#[bs,40^3,24,6]
UT = U.permute([0, 1, 3, 2])
# losst1 = torch.matmul(torch.matmul(UT, K), U).sum()
# print(losst1)
# FU = (U * F).sum()
# losst1 =
UKU = torch.matmul(torch.matmul(UT, K), U)
UKU0 = UKU[:,:,0,0].sum()
UKU1 = UKU[:,:,1,1].sum()
UKU2 = UKU[:,:,2,2].sum()
UKU3 = UKU[:,:,3,3].sum()
UKU4 = UKU[:,:,4,4].sum()
UKU5 = UKU[:,:,5,5].sum()
losst1 = UKU0+UKU1+UKU2+UKU3+UKU4+UKU5
print(np.array([UKU0.item(), UKU1.item(), UKU2.item(),
UKU3.item(), UKU4.item(), UKU5.item()]))
return losst1
def oricheck(U,rho,Ke,edofMat):
device = torch.device(f"cuda:{0}" if torch.cuda.is_available() else "cpu")
print("DEVICE : ", device)
edofMat = torch.from_numpy(edofMat).to(device).long()
Ke = torch.from_numpy(Ke).to(device)
rho = rho.to(device)
# U = U.to(device)
print("INPUT info.:----------------------------------")
print('* U :',U.cpu().detach().numpy().shape,U.dtype,U.sum().cpu().detach().numpy())
print('* rho :',rho.cpu().numpy().shape,rho.dtype,rho.sum().cpu().numpy(),rho.min().cpu().numpy(),rho.max().cpu().numpy())
print('* edofMat :',edofMat.cpu().numpy().shape,edofMat.dtype,edofMat.sum().cpu().numpy(),edofMat.min().cpu().numpy(),edofMat.max().cpu().numpy())
print('* Ke :',Ke.cpu().numpy().shape,Ke.dtype,abs(Ke).sum().cpu().numpy())
# resolution = U.shape[2]
# U = torch.rand((1,18,resolution,resolution,resolution),dtype = torch.float64,device=device,requires_grad=True)
# U = U.requires_grad_()
start = time.perf_counter()
losst1 = originalMethod_check(U,rho,Ke, edofMat)
elapsed = time.perf_counter() - start
print(f"elapsed in {elapsed} s")
return losst1
#%% assemble
def assembleKU_periodic(rho,U,Ke,Fe,edofMat,ibatch,outidx,Idxx,Idxy,Idxz):
print('=============assembleKU-------Periodic B.C.---------------')
x = rho[0,0].cpu().numpy()
x = np.transpose(x,(2,1,0))
resolution = x.shape[0]
nele = resolution**3; ndof = nele*3
K,F = AssembleGlobalKF(x,Ke,Fe,edofMat,ndof,nele)
print(K.dtype,K.shape)
ref_map = Utensor2vec(U)
uku = np.zeros((6))
for i in range(6):
Uvec = ref_map[0,:,i].cpu().detach().numpy()
uku[i] = Uvec @ (K@Uvec)
print(uku)
def assembleKU_check(rho,U,Ke,Fe,edofMat,ibatch,outidx,Idxx,Idxy,Idxz):
from periodicU import periodicU
U = periodicU(U)
x = rho[0,0].cpu().numpy()
# resolution = x.shape[0]
# nele = resolution**3; ndof = nele#3*(resolution+1)**3
x = np.transpose(x,(2,1,0))
resolution = x.shape[0]
nele = resolution**3; ndof = 3*(resolution+1)**3
from mesh3D import mesh3D,edofMatrix
eleidx,MESH,V = mesh3D(resolution)
edofMat = edofMatrix(MESH)
K,F = AssembleGlobalKF(x,Ke,Fe,edofMat,ndof,nele)
print(K.dtype,K.shape)
ref_map = Utensor2vec(U)
uku = np.zeros((6))
for i in range(6):
Uvec = ref_map[0,:,i].cpu().detach().numpy()
uku[i] = Uvec @ (K@Uvec)
print(uku)
imap = outidx//3
Uvec = ref_map[0,:,[imap]].cpu().detach().numpy()
KU = K@Uvec
print(f"imap = {imap},KU.shape = {KU.shape}")
resolution = resolution + 1
inodidx = Idxz*resolution**2 + Idxy*resolution + Idxx
idofidx = inodidx*3 + outidx % 3
print(f"inodidx = {inodidx}, idofidx = {idofidx}")
print('*** assembleKU_i = ',KU[idofidx])
print('*** U_i = ',Uvec[idofidx])
# Uvec = np.ones((K.shape[0],1))
# print(Uvec[3:].T @ (K[3:,:][:,3:]@Uvec[3:]))
# Uvec[:3] = 0
# print(Uvec.T @ (K@Uvec))
def AssembleGlobalK(x,Ke,edofMat,ndof,nele):
from scipy import sparse
penal = 1;
rho = x.flatten()#[:nele]
Emin = 0#1e-9;
rho = np.maximum(rho,Emin)
iK = np.kron(edofMat,np.ones((24,1))).flatten()
jK = np.kron(edofMat,np.ones((1,24))).flatten()
sK = ((Ke.flatten()[np.newaxis]).T*(rho**penal)).flatten(order='F')
K = sparse.coo_matrix((sK, (iK, jK)), shape=(ndof, ndof)).tocsc()
return K
def AssembleGlobalKF(x,Ke,Fe,edofMat,ndof,nele):
from scipy import sparse
penal = 1;
rho = x.flatten()#[:nele]
Emin = 0#1e-9;
rho = np.maximum(rho,Emin)
iK = np.kron(edofMat,np.ones((24,1))).flatten()
jK = np.kron(edofMat,np.ones((1,24))).flatten()
sK = ((Ke.flatten()[np.newaxis]).T*(rho**penal)).flatten(order='F')
K = sparse.coo_matrix((sK, (iK, jK)), shape=(ndof, ndof)).tocsc()
iF = np.kron(edofMat,np.ones((6,1))).flatten()
jF = np.kron(np.repeat(np.array(range(6))[np.newaxis],nele,axis = 0),np.ones((1,24))).flatten()
sF = ((Fe.flatten(order='F')[np.newaxis]).T*(rho**penal)).flatten(order='F')
F = sparse.coo_matrix((sF, (iF, jF)), shape=(ndof, 6)).tocsc()
return K,F
#%% others
def KUperEleComputation():
resolution = 2
from mesh3D import mesh3D,edofMatrix
eleidx,MESH,V = mesh3D(resolution)
edofMat = edofMatrix(MESH)
idofidx = 39
nele = edofMat.shape[0]
for iele in range(nele):
edof = edofMat[iele,:]
for j in range(24):
if edof[j]==idofidx:
for k in range(24):
print(f"Ke[{j},{k}]*U[{edof[k]}] + ",end = ' ')
# Ke[j,:]*U[edof]
print(' ')
def Utensor2vec(U):
if len(U.shape)==5:#[bs,18,resolution,resolution,resolution]
size = U.shape[0]
U = U.permute((0,1,4,3,2))
if U.shape[1]==18:
ref18 = U.contiguous().view(size,18,-1) #[bs,18,40**3]
permuteList = (0,2,1)
map0 = ref18[:,0:3].permute(permuteList).contiguous().view(size,-1,1)
map1 = ref18[:,3:6].permute(permuteList).contiguous().view(size,-1,1)
map2 = ref18[:,6:9].permute(permuteList).contiguous().view(size,-1,1)
map3 = ref18[:,9:12].permute(permuteList).contiguous().view(size,-1,1)
map4 = ref18[:,12:15].permute(permuteList).contiguous().view(size,-1,1)
map5 = ref18[:,15:18].permute(permuteList).contiguous().view(size,-1,1)
ref_map = torch.cat([map0,map1,map2,map3,map4,map5], 2)# [bs,3*40**3,6]
if U.shape[1]==3:#[bs,3,resolution,resolution,resolution]
ref3 = U.contiguous().view(size,3,-1) #[bs,18,40**3]
ref_map = ref3.permute((0,2,1)).contiguous().view(size,-1,1)# [bs,3*40**3,1]
if len(U.shape)==4:#[3,resolution,resolution,resolution]
size = 1
ref3 = U.contiguous().view(size,3,-1) #[bs,18,40**3]
ref_map = ref3.permute((0,2,1)).contiguous().view(size,-1,1)# [bs,3*40**3,1]
return ref_map
def fixeddofs(resolution):
node = []
for i in [0,resolution]:
for j in [0,resolution]:
for k in [0,resolution]:
nodeidx = i*resolution**2 + j*resolution + k
node.append(nodeidx)
fixed = []
for i in node:
for j in range(3):
fixed.append(i*3+j)
return fixed
def filtercheck(Ke):
from symbolicExec_vec2 import getFilters
filters = getFilters(Ke)
from symbolicExec_vec2 import symbolicExec
theta = symbolicExec(Ke,1,1,1,1,1,1,1,1)
print("---Ke----filters----theta---")
print(Ke.dtype,filters[255,].dtype,theta.dtype)
print(Ke.sum(),filters[255,].sum(),theta.sum())
print(Ke[21,0],filters[255,0,0,0],theta[0,0,0])
for ix in range(3):
for iy in range(3):
idxx = list(np.arange(ix,24,3))
idxy = list(np.arange(iy,24,3))
print(f"ix={ix},iy={iy}: ",Ke[idxx,:][:,idxy].shape,
Ke[idxx,:][:,idxy].sum(),filters[255,ix,iy].sum(),theta[ix,iy].sum())
filters = torch.from_numpy(filters)
print(Ke.dtype,filters[255,].dtype,theta.dtype)
print(Ke.sum(),filters[255,].sum(),theta.sum())
print(Ke[21,0],filters[255,0,0,0],theta[0,0,0])
for ix in range(3):
for iy in range(3):
idxx = list(np.arange(ix,24,3))
idxy = list(np.arange(iy,24,3))
print(f"ix={ix},iy={iy}: ",Ke[idxx,:][:,idxy].shape,
Ke[idxx,:][:,idxy].sum(),filters[255,ix,iy].sum(),theta[ix,iy].sum())
filters = filters.float()
theta = theta.astype(np.float32)
Ke = Ke.astype(np.float32)
print(Ke.dtype,filters[255,].dtype,theta.dtype)
for ix in range(3):
for iy in range(3):
idxx = list(np.arange(ix,24,3))
idxy = list(np.arange(iy,24,3))
print(f"ix={ix},iy={iy}: ",Ke[idxx,:][:,idxy].shape,
Ke[idxx,:][:,idxy].sum(),filters[255,ix,iy].sum(),theta[ix,iy].sum())
#%% FEconv_Pycheck
def FEconv_Pycheck_varU(U,rho,Ke,ibatch,outidx,Idxx,Idxy,Idxz):
x = rho[0,0].cpu().numpy()
resolution = x.shape[0]
nele = resolution**3; ndof = 3*(resolution+1)**3
from mesh3D import mesh3D,edofMatrix
eleidx,MESH,V = mesh3D(resolution)
edofMat = edofMatrix(MESH)
K = AssembleGlobalK(x,Ke,edofMat,ndof,nele)
inodidx = Idxx*(resolution+1)**2 + Idxy*(resolution+1) + Idxz
idofidx = inodidx*3 + outidx % 3
print(f"outidx,Idxx,Idxy,Idxz,inodidx,idofidx = {outidx,Idxx,Idxy,Idxz,inodidx,idofidx}")
U,H8types,nodIdx,filters = datapre_feconv(U,rho,Ke)
for i in range(3):
for ix in range(2):
for iy in range(2):
for iz in range(2):
U2 = U
U2[0,i,ix,iy,iz]=2
print(f"(i,ix,iy,iz) = {i,ix,iy,iz}")
ref_map = Utensor2vec(U2)
Uvec = ref_map[ibatch,:,outidx // 3]
Uvec = Uvec.numpy()
KU = K@Uvec
print(f', assembleKU_[{idofidx}] = ',KU[idofidx],end='')
convresult = FEconv_PyCheck_st(U2,H8types,filters,nodIdx,ibatch,outidx,Idxx,Idxy,Idxz)
def FEconv_PyCheck(U,rho,Ke,ibatch,outidx,Idxx,Idxy,Idxz):
U,H8types,nodIdx,filters = datapre_feconv(U,rho,Ke)
'''
UKU = 0
for outidx in range(3):
for Idxx in range(40):
for Idxy in range(40):
for Idxz in range(40):
convresult = FEconv_PyCheck_st(U,H8types,filters,nodIdx,ibatch,outidx,Idxx,Idxy,Idxz)
UKU += convresult
print('ConvUKU = ',UKU)
'''
convresult = FEconv_PyCheck_st(U,H8types,filters,nodIdx,ibatch,outidx,Idxx,Idxy,Idxz)
# for i in range(8):
# h8type = 2**i
# convresult = FEconv_PyCheck_st2(U,h8type,filters,nodIdx,ibatch,outidx,Idxx,Idxy,Idxz)
x = rho[0,0].cpu().numpy()
x = np.transpose(x,(2,1,0))
resolution = x.shape[0]
nele = resolution**3; ndof = 3*(resolution+1)**3
from mesh3D import mesh3D,edofMatrix
eleidx,MESH,V = mesh3D(resolution)
edofMat = edofMatrix(MESH)
K = AssembleGlobalK(x,Ke,edofMat,ndof,nele)
# print('global K: ',K.dtype,K.shape)
ref_map = Utensor2vec(U)
Uvec = ref_map[ibatch,:,outidx // 3]
Uvec = Uvec.numpy()
# Uvec = np.ones((K.shape[0]),dtype = K.dtype)
KU = K@Uvec
# inodidx = Idxx*(resolution+1)**2 + Idxy*(resolution+1) + Idxz
inodidx = Idxz*(resolution+1)**2 + Idxy*(resolution+1) + Idxx
idofidx = inodidx*3 + outidx % 3
print(f"outidx,Idxx,Idxy,Idxz,inodidx,idofidx = {outidx,Idxx,Idxy,Idxz,inodidx,idofidx}")
print(f'*** assembleKU_[{idofidx}] = ',KU[idofidx])
# print(KU.T)
# ndofs = K.shape[0]
# dofs=np.arange(ndofs)
# fixed = fixeddofs(resolution)
# fixed = np.array(fixed)
# free=np.setdiff1d(dofs,fixed)
'''
for i in range(ndofs):
if K[idofidx,i] != 0:
print(f" Uvec[{i}] * K[{idofidx},{i}] = {Uvec[i]} * {K[idofidx,i]}")
'''
# print(f"UKU = {np.ones((1,K.shape[0]),dtype = K.dtype)[:, free]@KU[free]}")
def FEconv_PyCheck_st(U,H8types,filters,nodIdx,ibatch,outidx,Idxx,Idxy,Idxz):
print('-----------------------------------------FEconv_PyCheck_st')
convresult = 0
h8type = H8types[ibatch,0,Idxx,Idxy,Idxz]
direction = outidx % 3
print(f"h8type = {h8type}, direction = {direction}")
for j in range(27):
# uidx1 = nodIdx[Idxz][Idxy][Idxx][j][0];
# uidx2 = nodIdx[Idxz][Idxy][Idxx][j][1];
# uidx3 = nodIdx[Idxz][Idxy][Idxx][j][2];
uidx1 = nodIdx[Idxx][Idxy][Idxz][j][0];
uidx2 = nodIdx[Idxx][Idxy][Idxz][j][1];
uidx3 = nodIdx[Idxx][Idxy][Idxz][j][2];
if ((uidx1+1)*(uidx2+1)*(uidx3+1)!=0):
# print(f' j={j}, uidx1={uidx1}, uidx2={uidx2}, uidx3={uidx3}')
for ix in range(3):
# print(f'ix={ix}, j={j}, uidx1={uidx1}, uidx2={uidx2}, uidx3={uidx3}')
# convresult += U[ibatch][outidx - direction + ix][uidx1][uidx2][uidx3] * filters[h8type][ix][direction][j];
# convresult += U[ibatch][outidx - direction + ix][uidx1][uidx2][uidx3] * filters[h8type][direction][ix][j];
convresult += U[ibatch][outidx - direction + ix][uidx1][uidx2][uidx3] * filters[h8type][direction][ix][j];
# ix=0
# print(f"U[{outidx - direction + ix}][{uidx1}][{uidx2}][{uidx3}] * filters[{direction}][{ix}][{j}] = {U[ibatch][outidx - direction + ix][uidx1][uidx2][uidx3]} * {filters[h8type][direction][ix][j]}")
print('convresult = ',convresult.numpy())
# tmp = filters[10,:,2,:]
# print(tmp.sum(),tmp[:,[1,2,4,5,7,8,10,11,13,14,16,17]].sum())
return convresult
#%% main
if __name__ == "__main__":
print('modify mark 0')
device = torch.device(f"cuda:{0}" if torch.cuda.is_available() else "cpu")
print("DEVICE : ", device)
ibatch = 0; outidx = 1; Idxx = 31; Idxy = 11; Idxz = 21;
#%%% rho,U generate
# ========================================================================
batchsize = 64; resolution = 40
randU=True; randrho=True; isFloat64=True
if isFloat64:
torchtype = torch.float64
else:
torchtype = torch.float32
print(f'**** batchsize = {batchsize}, resolution = {resolution} ****')
if randU:
U = torch.rand((batchsize,18,resolution,resolution,resolution),dtype = torchtype,device=device)
else:
U = torch.ones((batchsize,18,resolution,resolution,resolution),dtype = torchtype,device=device)
if randrho:
rho = torch.rand((batchsize,1,resolution,resolution,resolution),dtype = torchtype)
else:
rho = torch.ones((batchsize,1,resolution,resolution,resolution),dtype = torchtype)
from periodicU import periodicU
U = periodicU(U)
U.requires_grad_()
rho[rho<=0.5] = 0
rho[rho >0.5] = 1
print(f"U is random: **{randU}**, rho is random: **{randrho}**")
print(f'rho is discret: **{torch.abs((rho-1)*rho).max().item() == 0}**' )
print(f"U.requires_grad={U.requires_grad}, U.is_leaf = {U.is_leaf}")
#%%% Hom. Param.s
# ========================================================================
Ke,Fe,edofMat = data_pre_dtype(batchsize = 1,resolution = resolution, isFloat64=True)
# import h5py
# matFile = "G:\FangCloudV2\个人文件\WorkFiles\MMC_DNN\morphology\Kematlab.mat"
# matData = h5py.File(matFile,'r')
# Ke = np.transpose( matData['Ke'][()])
# Ke = np.random.rand(24,24)
# for i in range(24):
# for j in range(i,24):
# Ke[i,j] = Ke[j,i]
# print('* Ke :',Ke.shape,Ke.dtype,abs(Ke).sum())
# filtercheck(Ke.astype(np.float64))
#%%% FEconv_test
print("======================= FEconvNet ===============================")
FEconv_runTwice(U,rho,Ke)
# FEconv_Pycheck_varU(U,rho,Ke,ibatch,outidx,Idxx,Idxy,Idxz)
# feconv_check(U,rho,Ke,ibatch,outidx,Idxx,Idxy,Idxz)
L = feconvNet_periodicU_check(U,rho,Ke,ibatch,outidx,Idxx,Idxy,Idxz)
# FEconv_PyCheck(U,rho,Ke,ibatch,outidx,Idxx,Idxy,Idxz)
# ix = outidx % 3
# idxx = list(np.arange(ix,24,3))
# idxy = list(np.arange(24))
# # idxy = list(np.arange(iy,24,3))
# print(Ke[idxx,:][:,idxy].sum(),Ke[idxy,:][:,idxx].sum())
print("------------------------ Gradients ------------------------------")
start = time.perf_counter()
print("L = ",L)
print(f"U.requires_grad={U.requires_grad}, U.is_leaf = {U.is_leaf}")
L.backward()
gradU = U.grad
print(f"gradU.shape = {gradU.shape}")
# ibatch = 0; outidx = 1; Idxx = 1; Idxy = 11; Idxz = 21;
print(f"*** gradU[{ibatch},{outidx},{Idxx},{Idxy},{Idxz}] = {gradU[ibatch,outidx,Idxx,Idxy,Idxz]}")
print(f"*** gradU.sum() = {gradU.sum()}")
elapsed = time.perf_counter() - start
print(f"elapsed in {elapsed}s")
# FEconv_PyCheck(U,rho,Ke)
FEconv_runTwice(U,rho,Ke)
'''
#%%% original_test
print('==================== original method ============================')
L = oricheck(U,rho,Ke,edofMat)
print("------------------------ Gradients ------------------------------")
start = time.perf_counter()
print("L = ",L)
print(f"U.requires_grad={U.requires_grad}, U.is_leaf = {U.is_leaf}")
L.backward()
gradU = U.grad
print(f"gradU.shape = {gradU.shape}")
# ibatch = 0; outidx = 1; Idxx = 1; Idxy = 11; Idxz = 21;
print(f"*** gradU[{ibatch},{outidx},{Idxx},{Idxy},{Idxz}] = {gradU[ibatch,outidx,Idxx,Idxy,Idxz]}")
print(f"*** gradU.sum() = {gradU.sum()}")
elapsed = time.perf_counter() - start
print(f"elapsed in {elapsed}s")
#%%% assemble_test
print('===================== assembleKU ================================')
assembleKU_check(rho,U,Ke,Fe,edofMat,ibatch,outidx,Idxx,Idxy,Idxz)
assembleKU_periodic(rho,U,Ke,Fe,edofMat,ibatch,outidx,Idxx,Idxy,Idxz)
'''