You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
 
 

522 lines
21 KiB

# coding=utf-8
import torch
import torch.optim as optim
import torch.nn as nn
import torch.nn.functional as F
import numpy as np
from models.codec_us import *
from models.codec3d import *
# my loss
from models.darcy import *
from torch.utils.data import DataLoader, TensorDataset
from utils.misc import mkdirs, to_numpy
from utils.plot import plot_prediction_det, save_stats
from utils.practices import OneCycleScheduler, adjust_learning_rate, find_lr
import time
import argparse
import random
from pprint import pprint
import json
import matplotlib.pyplot as plt
import h5py
import numpy as np
from mat_data import new_load_data
from torch.optim.lr_scheduler import ReduceLROnPlateau
plt.switch_backend('agg')
class Parser(argparse.ArgumentParser):
def __init__(self):
super(Parser, self).__init__(description='Learning surrogate with mixed residual norm loss')
# self.add_argument('--exp-name', type=str, default='codec/mixed_residual', help='experiment name')
self.add_argument('--exp-dir', type=str, default="./experiments", help='directory to save experiments')
# codec
self.add_argument('--blocks', type=list, default=[8, 12, 8], help='list of number of layers in each dense block')
self.add_argument('--growth-rate', type=int, default=16, help='number of output feature maps of each conv layer within each dense block')
self.add_argument('--init-features', type=int, default=32, help='number of initial features after the first conv layer')
self.add_argument('--drop-rate', type=float, default=0., help='dropout rate')
self.add_argument('--upsample', type=str, default='nearest', choices=['nearest', 'bilinear'])
# data
self.add_argument('--data-dir', type=str, default="./datasets", help='directory to dataset')
self.add_argument('--data', type=str, default='TPMSp', choices=['grf_kle512', 'channelized'])
self.add_argument('--ntrain', type=int, default=32, help="number of training data")
self.add_argument('--ntest', type=int, default=512, help="number of validation data")
self.add_argument('--imsize', type=int, default=64)
# training
self.add_argument('--run', type=int, default=1, help='run instance')
self.add_argument('--epochs', type=int, default=2000, help='number of epochs to train')
self.add_argument('--lr', type=float, default=1e-2, help='learnign rate')
self.add_argument('--lr-div', type=float, default=2., help='lr div factor to get the initial lr')
self.add_argument('--lr-pct', type=float, default=0.3, help='percentage to reach the maximun lr, which is args.lr')
self.add_argument('--weight-decay', type=float, default=0., help="weight decay")
self.add_argument('--weight-bound', type=float, default=10, help="weight for boundary loss")
self.add_argument('--batch-size', type=int, default=32, help='input batch size for training')
self.add_argument('--test-batch-size', type=int, default=64, help='input batch size for testing')
self.add_argument('--seed', type=int, default=1, help='manual seed used in Tensor')
self.add_argument('--cuda', type=int, default=0, choices=[0, 1, 2, 3], help='cuda index')
# logging
self.add_argument('--debug', action='store_true', default=True, help='debug or verbose')
self.add_argument('--ckpt-epoch', type=int, default=None, help='which epoch of checkpoints to be loaded')
self.add_argument('--ckpt-freq', type=int, default=100, help='how many epochs to wait before saving model')
self.add_argument('--log-freq', type=int, default=10, help='how many epochs to wait before logging training status')
self.add_argument('--plot-freq', type=int, default=50, help='how many epochs to wait before plotting test output')
self.add_argument('--plot-fn', type=str, default='imshow', choices=['contourf', 'imshow'], help='plotting method')
def parse(self):
args = self.parse_args()
tid = 1000
# hparams = f'Phy_{tid}_ntrain{args.ntrain}_run{args.run}_bs{args.batch_size}_lr{args.lr}_epochs{args.epochs}'
hparams = f'PhysDriven_epochs{args.epochs}'
# if args.debug:
# hparams = 'debug/' + hparams
args.run_dir = args.exp_dir + '/' + hparams
args.ckpt_dir = args.run_dir + '/checkpoints'
# print(args.run_dir)
# print(args.ckpt_dir)
mkdirs(args.run_dir, args.ckpt_dir)
# assert args.ntrain % args.batch_size == 0 and \
# args.ntest % args.test_batch_size == 0
if args.seed is None:
args.seed = random.randint(1, 10000)
print("Random Seed: ", args.seed)
random.seed(args.seed)
torch.manual_seed(args.seed)
print('Arguments:')
pprint(vars(args))
with open(args.run_dir + "/args.txt", 'w') as args_file:
json.dump(vars(args), args_file, indent=4)
return args
def torch_savemodel(model, epoch, optimizer, loss, path):
# torch.save(model, "{}/model_epoch{}.pth".format(model_dir , e))
torch.save({
'epoch': epoch,
'model_state_dict': model.state_dict(),
'optimizer_state_dict': optimizer.state_dict(),
'loss': loss,
}, path)
def process_rhodata(filename, data_shape, device):
x_3d = np.loadtxt(filename, delimiter=',', dtype=np.float32)
data_size = x_3d.shape[0]
x_3d = x_3d.reshape((data_size,1,data_shape,data_shape,data_shape))
# input_data = torch.from_numpy(x_3d[1:-1]).to(device)
input_data = torch.from_numpy(x_3d).to(device)
return input_data
def process_refdata(filename, data_shape, device):
data = np.loadtxt(filename, delimiter=',', dtype=np.float32)
data_size = int(data.shape[0]/6)
tmp = data.reshape(data_size,6,-1)
output1 = torch.from_numpy(tmp).to(device).permute((0,2,1))
x_ = tmp[:,:,::3]
y_ = tmp[:,:,1::3]
z_ = tmp[:,:,2::3]
img18 = np.concatenate([x_,y_,z_],1)
img18_3d = img18.reshape(data_size,18,data_shape,data_shape,data_shape)
# ref = torch.from_numpy(img18_3d[1:-1]).to(device)
ref = torch.from_numpy(img18_3d).to(device)
return ref
def compute_DH(input_data, ref):
# input_data torch cpu
size = input_data.shape[0]
ref18 = ref.contiguous().view(size,18,-1)
map0 = ref18[:,0::6].permute((0,2,1)).contiguous().view(size,-1,1)
map1 = ref18[:,1::6].permute((0,2,1)).contiguous().view(size,-1,1)
map2 = ref18[:,2::6].permute((0,2,1)).contiguous().view(size,-1,1)
map3 = ref18[:,3::6].permute((0,2,1)).contiguous().view(size,-1,1)
map4 = ref18[:,4::6].permute((0,2,1)).contiguous().view(size,-1,1)
map5 = ref18[:,5::6].permute((0,2,1)).contiguous().view(size,-1,1)
ref_map = torch.cat([map0,map1,map2,map3,map4,map5], 2)
ref_U = ref_map[:, edofMat, :]
DHs_ref = []
for idx in range(size):
DH = np.zeros((6,6))
x_vec= input_data[idx].view(-1).numpy()
for iele in range(nele):
uie= ref_U[idx,iele].detach().numpy()
De = D0*x_vec[iele]
DH = DH + De @ (I*h**3 - intB @ uie)
DHs_ref.append(DH)
DHs_ref_np = np.array(DHs_ref, dtype=np.float32)
return DHs_ref_np
def load_dataset(dtype, U_flag, DH_flag, val_flag):
ddir = "alldata/"
dh_idx = [0, 1, 2, 7, 8, 14, 21, 28, 35]
if dtype == "frame128" or dtype == "tpms2" or dtype == "frame2_train" or dtype == "frame2_val" or dtype == "RSV" or dtype == "lhs_p" or dtype == "lhs_pp" or dtype == "lhs_small" or dtype == "pcg_lhs" or dtype == "rsv_small" or dtype == "new_frame" or dtype == "lhs" or dtype == "randgeo" or dtype == "randgrey":
load_data = np.load(ddir+dtype+".npz")
rho = load_data['x0']
rho = torch.from_numpy(rho)
ret = [rho]
if U_flag:
displacement = load_data['y0']
displacement = torch.from_numpy(displacement)
ret.append(displacement)
if DH_flag:
load_data_DH = np.load(ddir+dtype+"_DH.npy")
if DH_flag==2:
load_data_DH = np.array(load_data_DH, dtype=np.float32)
else:
load_data_DH = np.array(load_data_DH.reshape(-1, 36)[:, dh_idx], dtype=np.float32)
DH = torch.from_numpy(load_data_DH)
ret.append(DH)
return ret
if dtype == "tpms":
load_data = np.load(ddir+"TPMS.npz")
rho = np.concatenate((load_data['data_g'], load_data['data_p'], load_data['data_iwp']), 0)
rho = torch.from_numpy(rho)
if DH_flag:
load_data_DH = np.load(ddir+"TPMS_DH.npy")
load_data_DH = np.array(load_data_DH.reshape(-1,36)[:, dh_idx], dtype=np.float32)
DH = torch.from_numpy(load_data_DH)
return rho, DH
if U_flag:
displacement = np.concatenate((load_data['label_g'], load_data['label_p'], load_data['label_iwp']), 0)
displacement = torch.from_numpy(displacement)
return rho, displacement
torch.backends.cudnn.benchmark = True
args = Parser().parse()
device = torch.device(f"cuda:{args.cuda}" if torch.cuda.is_available() else "cpu")
args.train_dir = args.run_dir + '/training'
args.pred_dir = args.train_dir + '/predictions'
mkdirs(args.train_dir, args.pred_dir)
# model = convNN3d_toC(1,4).to(device)
# model = convNN3d(1,18,9).to(device)
model = convNN3d(1,18,8).to(device)
# model = convNN3d_d2(1,18,6).to(device)
# model = DenseED3d(in_channels=1, out_channels=18,
# blocks=[4, 6, 4],
# growth_rate=4,
# init_features=8).to(device)
# model = DenseED3d(in_channels=1, out_channels=18,
# blocks=[4, 6 ,6, 6, 4],
# growth_rate=6,
# init_features=8).to(device)
# u
# ckpt_file = "/home/tmn07/code/py36/pde-surrogate/experiments/codec/mixed_residual/now_compare/401/model_epoch6000.pth"
# uku
# ckpt_file = "/home/tmn07/code/py36/pde-surrogate/experiments/codec/mixed_residual/now_compare/400/model_epoch6000.pth"
# ku_f
# ckpt_file = "/home/tmn07/code/py36/pde-surrogate/experiments/codec/mixed_residual/now_compare/3018/model_epoch6000.pth"
# ckpt_file = "/home/tmn07/code/py36/pde-surrogate/experiments/codec/mixed_residual/now_compare/3021/model_epoch6500.pth"
# checkpoint = torch.load(ckpt_file)
# 1=continue 0=start
train_flag = 0
# train_flag = 1
if args.debug:
print(model)
pass
# data_shape = 20
data_shape = 40
I = np.eye(6)
D0 = np.array([[1.3462, 0.5769, 0.5769, 0, 0, 0],
[0.5769, 1.3462, 0.5769, 0, 0, 0],
[0.5769, 0.5769, 1.3462, 0, 0, 0],
[ 0, 0, 0, 0.3846, 0, 0],
[ 0, 0, 0, 0, 0.3846, 0],
[ 0, 0, 0, 0, 0, 0.3846]])
if data_shape == 20:
nele = 8000
intB = np.array([[-0.000625,0,0,0.000625, 0,0,-0.000625,0,0,0.000625, 0,0,-0.000625,0,0,0.000625, 0,0,-0.000625,0,0,0.000625, 0,0],
[0,-0.000625,0,0,-0.000625,0,0,0.000625, 0,0,0.000625, 0,0,-0.000625,0,0,-0.000625,0,0,0.000625, 0,0,0.000625, 0],
[0,0,-0.000625,0,0,-0.000625,0,0,-0.000625,0,0,-0.000625,0,0,0.000625, 0,0,0.000625, 0,0,0.000625, 0,0,0.000625],
[-0.000625,-0.000625,0,-0.000625,0.000625, 0,0.000625, -0.000625,0,0.000625, 0.000625, 0,-0.000625,-0.000625,0,-0.000625,0.000625, 0,0.000625, -0.000625,0,0.000625, 0.000625, 0],
[0,-0.000625,-0.000625,0,-0.000625,-0.000625,0,-0.000625,0.000625, 0,-0.000625,0.000625, 0,0.000625, -0.000625,0,0.000625, -0.000625,0,0.000625, 0.000625, 0,0.000625, 0.000625],
[-0.000625,0,-0.000625,-0.000625,0,0.000625, -0.000625,0,-0.000625,-0.000625,0,0.000625, 0.000625, 0,-0.000625,0.000625, 0,0.000625, 0.000625, 0,-0.000625,0.000625, 0,0.000625]])
h=0.05
else:
nele = 64000
intB = np.array([[-0.000156250,0,0,0.000156250,0,0,-0.000156250,0,0,0.000156250,0,0,-0.000156250,0,0,0.000156250,0,0,-0.000156250,0,0,0.000156250,0,0],
[0,-0.000156250,0,0,-0.000156250,0,0,0.000156250,0,0,0.000156250,0,0,-0.000156250,0,0,-0.000156250,0,0,0.000156250,0,0,0.000156250,0],
[0,0,-0.000156250,0,0,-0.000156250,0,0,-0.000156250,0,0,-0.000156250,0,0,0.000156250,0,0,0.000156250,0,0,0.000156250,0,0,0.000156250],
[-0.000156250,-0.000156250,0,-0.000156250,0.000156250,0,0.000156250,-0.000156250,0,0.000156250,0.000156250,0,-0.000156250,-0.000156250,0,-0.000156250,0.000156250,0,0.000156250,-0.000156250,0,0.000156250,0.000156250,0],
[0,-0.000156250,-0.000156250,0,-0.000156250,-0.000156250,0,-0.000156250,0.000156250,0,-0.000156250,0.000156250,0,0.000156250,-0.000156250,0,0.000156250,-0.000156250,0,0.000156250,0.000156250,0,0.000156250,0.000156250],
[-0.000156250,0,-0.000156250,-0.000156250,0,0.000156250,-0.000156250,0,-0.000156250,-0.000156250,0,0.000156250,0.000156250,0,-0.000156250,0.000156250,0,0.000156250,0.000156250,0,-0.000156250,0.000156250,0,0.000156250]])
h=0.025
# 20
# Ke = np.loadtxt("3D/k.txt", delimiter=',', dtype=np.float32)
# Fe = np.loadtxt("3D/f.txt", delimiter=',', dtype=np.float32)
# 40
Ke = np.loadtxt("3D/k.txt", delimiter=',', dtype=np.float32) /2
Fe = np.loadtxt("3D/f.txt", delimiter=',', dtype=np.float32) /4
Ke = torch.from_numpy(Ke).to(device)
Fe = torch.from_numpy(Fe).to(device)
# mesh = np.loadtxt("3D/20mesh.txt", delimiter='\t', dtype=np.float32)
mesh = np.loadtxt("3D/40mesh.txt", delimiter='\t', dtype=np.float32)
tmp1 = 3*mesh-2
tmp2 = 3*mesh-1
tmp3 = 3*mesh
edofMat = np.zeros([mesh.shape[0], 24], dtype=np.int)
edofMat[:,::3] = tmp1
edofMat[:,1::3] = tmp2
edofMat[:,2::3] = tmp3
edofMat -= 1
Imask = torch.eye(6)
Imask[:3, :3] = 1
bound = 0.013462
dh_idx = [0,1,2, 7,8, 14, 21, 28, 35]
D00 = torch.from_numpy(D0.astype(np.float32)).to(device)
intB2 = torch.from_numpy(intB.astype(np.float32)).to(device)
Ih = I.astype(np.float32)*h**3
dh_idx12 = [0,1,2, 6,7,8, 12,13,14, 21, 28, 35]
dh9_12 = [0,1,2, 1,3,4, 2,4,5, 6,7,8]
## load data
#x, y = load_dataset('frame2_train', 0, 1, 0)
x,y=new_load_data()
y2 = y[:,dh9_12].numpy()
data_tuple = (x, y)
train_loader = DataLoader(TensorDataset(*data_tuple),
batch_size=8, shuffle=False, drop_last=True)
# SGDM
optimizer = torch.optim.SGD(model.parameters(), lr=args.lr, momentum=0.9)
# optimizer = optim.Adam(model.parameters(), lr=args.lr,
# weight_decay=args.weight_decay)
scheduler = OneCycleScheduler(lr_max=args.lr, div_factor=args.lr_div,
pct_start=args.lr_pct)
# scheduler = ReduceLROnPlateau(optimizer, 'min', patience=30, factor=0.8, cooldown=4, min_lr=1e-6, verbose=True)
start_epoch = 0
if train_flag == 1:
state_dict = {}
for k, v in checkpoint['model_state_dict'].items():
if k in model.state_dict().keys():
state_dict[k] = v
model.load_state_dict(state_dict)
# model.load_state_dict(checkpoint['model_state_dict'])
optimizer.load_state_dict(checkpoint['optimizer_state_dict'])
start_epoch = checkpoint['epoch']
loss = checkpoint['loss']
print(start_epoch, loss)
logger = {}
logger['loss_train'] = []
logger['u_l2loss'] = []
logger['u_l2loss_ref'] = []
logger['u_l2loss_randref'] = []
print('Start training...................................................')
# start_epoch = 1 if args.ckpt_epoch is None else args.ckpt_epoch + 1
tic = time.time()
# step = 0
total_steps = args.epochs * len(train_loader)
print(f'total steps: {total_steps}')
def val_function(input_data_val, input_data_val_cpu, ref_val, flag):
output_val = model(input_data_val)
output_val = output_val.cpu()
err2_sum2 = torch.sum((output_val - ref_val) ** 2, [2, 3, 4])
ref_val_2 = (ref_val ** 2).sum([2, 3, 4])
relative_l2 = torch.sqrt(err2_sum2 / ref_val_2)
relative_u_ref = relative_l2.mean()
if flag:
logger['u_l2loss_ref'].append(relative_u_ref.item())
else:
logger['u_l2loss_randref'].append(relative_u_ref.item())
# val C
DHs_np = compute_DH(input_data_val_cpu, output_val)
return DHs_np, relative_u_ref
def disp2DH(input, output, D0,intB,h):
pp = input.contiguous().view(input.shape[0], -1, 1, 1)
DD = pp * D0 # [16, 64000, 6, 6]
tmp = torch.matmul(intB, output)# [16, 64000, 6, 6]
DDh = DD*(h**3)
tmp2 = DDh-torch.matmul(DD,tmp)
DHs = tmp2.sum(1)
return DHs
# from apex import amp
# model, optimizer = amp.initialize(model, optimizer, opt_level="O1")
epoch_ts = []
for epoch in range(start_epoch+1, args.epochs + 1):
epoch_start_t = time.time()
model.train()
# with torch.no_grad():
# epoch = 0
# model.eval()
relative_l2 = []
loss_train, mse = 0., 0.
for batch_idx, (input, target) in enumerate(train_loader, start=1):
# print(batch_idx)
input=input.to(device)
# target=target.to(device)
model.zero_grad()
output = model(input)
# print(output.shape, target.shape)
# output[:,:,0,0,0] = 0
# loss_m = macro3d_KU_F(input, target, device, Ke, Fe, edofMat)
loss_m = macro3d_conv_energy(input, output, device, Ke, Fe, edofMat)
# loss_m = F.mse_loss(output, target)
loss = loss_m
# with amp.scale_loss(loss, optimizer) as scaled_loss:
# scaled_loss.backward()
loss.backward()
# err2_sum2 = torch.sum((output - target) ** 2, [2,3,4])
# relative_l2.append(torch.sqrt(err2_sum2 / (target ** 2).sum([2,3,4]) ) )
# lr scheduling
step = (epoch - 1) * len(train_loader) + batch_idx
# pct = (step - 32000) / (total_steps- 32000)
pct = (step) / (total_steps)
# print(step ,total_steps)
lr = scheduler.step(pct)
adjust_learning_rate(optimizer, lr)
torch.nn.utils.clip_grad_norm_(model.parameters(), 4)
optimizer.step()
# lr = optimizer.param_groups[0]['lr']
loss_train += loss.item()
epoch_end_t = time.time()
epoch_ts.append(epoch_end_t - epoch_start_t)
# print(f"time: {epoch_end_t - epoch_start_t}")
loss_train /= batch_idx
# relative_l2 = to_numpy(torch.cat(relative_l2, 0).mean(0))
# relative_u = relative_l2.mean()
# ur = relative_u
logger['loss_train'].append(loss_train)
# logger['u_l2loss'].append(relative_u)
# print(f'Epoch {epoch}, lr {lr:.6f}')
print(f'Epoch {epoch}: training loss: {loss_train:.6f}')
# print(f'Epoch {epoch}: training loss: {loss_train:.6f}, '\
# f'relative-error: {relative_u: .5f}')
# target = target.cpu().detach().numpy()
# output = output.cpu().detach().numpy()
# re = np.array((output - target) / target, dtype=np.float32)
# print(f'val-C-mean:{abs(re).mean(): .5}, val-C-max:{abs(re).max(): .5f}')
with open(args.train_dir+"/running_log.txt",'a') as logf:
logf.write(f'Epoch {epoch}: training loss: {loss_train:.6f}\n')
# logf.write(f'Epoch {epoch}: training loss: {loss_train:.6f}, relative-error: {relative_u: .5f}\n')
if epoch % args.log_freq == 0:
model.eval()
# 训练集上的结果
with torch.no_grad():
DHS_list = []
for batch_idx, (input, target) in enumerate(train_loader, start=1):
input = input.to(device)
output = model(input)
batch_size = input.shape[0]
ref18 = output.contiguous().view(batch_size, 18, -1)
map0 = ref18[:, 0::6].permute((0, 2, 1)).contiguous().view(batch_size, -1, 1)
map1 = ref18[:, 1::6].permute((0, 2, 1)).contiguous().view(batch_size, -1, 1)
map2 = ref18[:, 2::6].permute((0, 2, 1)).contiguous().view(batch_size, -1, 1)
map3 = ref18[:, 3::6].permute((0, 2, 1)).contiguous().view(batch_size, -1, 1)
map4 = ref18[:, 4::6].permute((0, 2, 1)).contiguous().view(batch_size, -1, 1)
map5 = ref18[:, 5::6].permute((0, 2, 1)).contiguous().view(batch_size, -1, 1)
output_map = torch.cat([map0, map1, map2, map3, map4, map5], 2)
output_U = output_map[:, edofMat, :]
DHs = disp2DH(input, output_U, D00, intB2, h)
DHS_list.append(DHs.cpu().numpy())
DHs_np = np.concatenate(DHS_list, 0)
torch.cuda.empty_cache()
o2 = DHs_np.reshape(-1, 36)[:, dh_idx12]
re = (o2 - y2) / y2
re[abs(y2) < bound] = 0
ret = abs(re.reshape(-1))
print(f'train-C-mean:{abs(ret).mean(): .5}')
if epoch % args.ckpt_freq == 0:
# torch.save(model, args.ckpt_dir + "/model_epoch{}.pth".format(epoch))
torch_savemodel(model, epoch, optimizer, loss_train, args.ckpt_dir + "/model_epoch{}.pth".format(epoch))
# sampledir = args.ckpt_dir + "/model{}".format(epoch)
# mkdirs((sampledir))
# get_testsample(model, sampledir, data, ref)
# testsample(args.ckpt_dir + "/model_epoch{}.pth".format(epoch), sampledir)
# with torch.no_grad():
# test(epoch)
print(f"mean-time: {np.array(epoch_ts[1:]).mean()}")
tic2 = time.time()
print(f'Finished training {args.epochs} epochs with {args.ntrain} data ' \
f'using {(tic2 - tic) / 60:.2f} mins')
metrics = ['loss_train','u_l2loss', 'u_l2loss_ref', 'u_l2loss_randref']
save_stats(args.train_dir, logger, *metrics)
# args.training_time = tic2 - tic
# args.n_params, args.n_layers = model.model_size
# with open(args.run_dir + "/args.txt", 'w') as args_file:
# json.dump(vars(args), args_file, indent=4)