import numpy as np import pandas as pd from sklearn.datasets import load_breast_cancer from sklearn.model_selection import train_test_split import torch from torch import nn # 加载乳腺癌数据集 data = load_breast_cancer() X = pd.DataFrame(data.data, columns=data.feature_names) y = pd.DataFrame(data.target,columns=['target']) X = torch.tensor(data.data).float() y = torch.tensor(data.target).reshape(-1,1).float() X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42) """ 构建数据集 """ import numpy as np from torch.utils.data import Dataset from torch.utils.data import DataLoader import torch from torch import nn from torch.nn import functional as F class MyDataSet(Dataset): def __init__(self,X,y): """ 构建数据集 """ self.X = X self.y = y.reshape(-1) # self.y = y.reshape(-1,1) def __len__(self): return len(self.X) def __getitem__(self, idx): x = self.X[idx] y = self.y[idx] return torch.tensor(data=x).float(), torch.tensor(data=y).long() train_dataset = MyDataSet(X=X_train, y=y_train) train_dataloader = DataLoader(dataset=train_dataset, shuffle=True, batch_size=128) test_dataset = MyDataSet(X=X_test, y=y_test) test_dataloader = DataLoader(dataset=test_dataset, shuffle=False, batch_size=256) # 定义训练轮次 epochs = 200 device = "cuda:0" if torch.cuda.is_available() else "cpu" # 定义过程监控函数 def get_acc(dataloader=train_dataloader, model=None): accs = [] model.to(device=device) model.eval() with torch.no_grad(): for X,y in dataloader: X=X.to(device=device) y=y.to(device=device) y_pred = model(X) y_pred = y_pred.argmax(dim=1) acc = (y_pred == y).float().mean().item() accs.append(acc) return np.array(accs).mean() # 定义训练过程 def train(model, epochs=epochs, train_dataloader=train_dataloader, test_dataloader=test_dataloader): model.to(device=device) # 定义优化器 optimizer = torch.optim.SGD(params=model.parameters(), lr=1e-3) # 定义损失函数 loss_fn = nn.CrossEntropyLoss() for epoch in range(1, epochs+1): print(f"正在进行第 {epoch} 轮训练:") model.train() for X,y in train_dataloader: X=X.to(device=device) y=y.to(device=device) # 正向传播 y_pred = model(X) # 清空梯度 optimizer.zero_grad() # 计算损失 loss = loss_fn(y_pred, y) # 梯度下降 loss.backward() # 优化一步 optimizer.step() print(f"train_acc: {get_acc(dataloader=train_dataloader,model=model)}, test_acc: {get_acc(dataloader=test_dataloader,model=model)}") |
from T import train from T import X_test import torch from torch import nn from torch.nn import functional as F class TextCNNML(nn.Module): """ TextCNN优化,多尺度 """ def __init__(self,in_features, out_features,L): super().__init__() # [N, C, seq_len] -- [N, C, seq_len-1],[N, C, seq_len-kernel_size+1] self.gram_2 = nn.Sequential( nn.Conv1d(in_channels=in_features, out_channels=256, kernel_size=2), nn.BatchNorm1d(num_features=256), nn.ReLU(), nn.MaxPool1d(kernel_size=L-1) # [N, C, 1] ) # [N, C, seq_len, 1] -- [N, C, seq_len-2, 1] self.gram_3 = nn.Sequential( nn.Conv1d(in_channels=in_features, out_channels=256, kernel_size=3), nn.BatchNorm1d(num_features=256), nn.ReLU(), nn.MaxPool1d(kernel_size=L-2) # [N, C, 1] ) # [N, C, seq_len, 1] -- [N, C, seq_len-3, 1] self.gram_4 = nn.Sequential( nn.Conv1d(in_channels=in_features, out_channels=256, kernel_size=4), nn.BatchNorm1d(num_features=256), nn.ReLU(), nn.MaxPool1d(kernel_size=L-3) # [N, C, 1] ) self.dropout1 = nn.Dropout(p=0.2) self.fc1 = nn.Linear(in_features=256*3, out_features=out_features) def forward(self,X): # [B, seq_len, embedding_dim] -- [B, embedding_dim, seq_len] # x = torch.permute(input=X, dims=(0, 2, 1)) x=X.unsqueeze(dim=1) # print(x.shape) # torch.Size([128, 256, 87]) x1 = self.gram_2(x) # print(f"x1.shape={x1.shape}") #torch.Size([128, 256, 1]) x2 = self.gram_3(x) x3 = self.gram_4(x) x = torch.concat(tensors=(x1,x2,x3),dim=1) # print(x.shape) # torch.Size([128, 768, 1]) x = torch.squeeze(x) x = self.dropout1(x) x = self.fc1(x) return x model = TextCNNML(in_features=1, out_features=2, L=X_test.shape[1]) #train_acc: 0.9159881174564362, test_acc: 0.9473684430122375 train(model) .... .... .... 正在进行第 195 轮训练: train_acc: 0.8776958584785461, test_acc: 0.9210526347160339 正在进行第 196 轮训练: train_acc: 0.9023162424564362, test_acc: 0.9473684430122375 正在进行第 197 轮训练: train_acc: 0.9089458584785461, test_acc: 0.9385964870452881 正在进行第 198 轮训练: train_acc: 0.9042693674564362, test_acc: 0.9385964870452881 正在进行第 199 轮训练: train_acc: 0.9077629745006561, test_acc: 0.9473684430122375 正在进行第 200 轮训练: train_acc: 0.9159881174564362, test_acc: 0.9473684430122375 |
|
|
|
from T import train from T import X_test import torch from torch import nn from torch.nn import functional as F class TextCNNML(nn.Module): """ TextCNN优化,多尺度+卷积 """ def __init__(self,in_features, out_features,L): super().__init__() hidden_size = 256 # [N, C, seq_len] -- [N, C, seq_len-1],[N, C, seq_len-kernel_size+1] self.gram_2 = nn.Sequential( nn.Conv1d(in_channels=in_features, out_channels=hidden_size, kernel_size=2), nn.BatchNorm1d(num_features=hidden_size), nn.ReLU(), nn.MaxPool1d(kernel_size=2) # [N, C, 1], ) # [N, C, seq_len, 1] -- [N, C, seq_len-2, 1] self.gram_3 = nn.Sequential( nn.Conv1d(in_channels=in_features, out_channels=hidden_size, kernel_size=3), nn.BatchNorm1d(num_features=hidden_size), nn.ReLU(), nn.MaxPool1d(kernel_size=2) # [N, C, 1] ) # [N, C, seq_len, 1] -- [N, C, seq_len-3, 1] self.gram_4 = nn.Sequential( nn.Conv1d(in_channels=in_features, out_channels=hidden_size, kernel_size=4), nn.BatchNorm1d(num_features=hidden_size), nn.ReLU(), nn.MaxPool1d(kernel_size=2) # [N, C, 1] ) self.conv1 = nn.Sequential( nn.Conv1d(in_channels=256, out_channels=hidden_size, kernel_size=3,stride=1,padding=1), nn.BatchNorm1d(num_features=hidden_size), nn.ReLU(), nn.MaxPool1d(kernel_size=2) ) self.conv2 = nn.Sequential( nn.Conv1d(in_channels=hidden_size, out_channels=hidden_size, kernel_size=3,stride=1,padding=1), nn.BatchNorm1d(num_features=hidden_size), nn.ReLU(), nn.MaxPool1d(kernel_size=2) ) self.dropout1 = nn.Dropout(p=0.2) # 256×1×1 self.ft = nn.Flatten() self.fc1 = nn.Linear(in_features=hidden_size*10, out_features=out_features) def forward(self,X): # [B, seq_len, embedding_dim] -- [B, embedding_dim, seq_len] # x = torch.permute(input=X, dims=(0, 2, 1)) x=X.unsqueeze(dim=1) # print(x.shape) # torch.Size([128, 256, 87]) x1 = self.gram_2(x) # print(f"x1.shape={x1.shape}") #torch.Size([128, 256, 1]) x2 = self.gram_3(x) x3 = self.gram_4(x) x = torch.concat(tensors=(x1,x2,x3),dim=2) # print(x.shape) # torch.Size([114, 256, 41]) x = self.conv1(x) x = self.conv2(x) x = self.dropout1(x) # print(x.shape) x = self.ft(x) # print(x.shape) x = self.fc1(x) return x model = TextCNNML(in_features=1, out_features=2, L=X_test.shape[1]) model(X_test)[:3] tensor([[-0.2633, 0.0193], [-0.3391, 0.9590], [-0.4151, 0.3640]], grad_fn=SliceBackward0) train(model) 收敛速度极快 train_acc: 0.37241417169570923, test_acc: 0.37719297409057617 正在进行第 2 轮训练: train_acc: 0.5244278162717819, test_acc: 0.5 正在进行第 3 轮训练: train_acc: 0.6502530723810196, test_acc: 0.7105263471603394 正在进行第 4 轮训练: train_acc: 0.8698833584785461, test_acc: 0.8947368264198303 正在进行第 5 轮训练: train_acc: 0.8687004745006561, test_acc: 0.8947368264198303 正在进行第 6 轮训练: train_acc: 0.907405361533165, test_acc: 0.9385964870452881 。。。。 之后稳定在94%多一些 。。。。 train_acc: 0.92536860704422, test_acc: 0.9473684430122375 正在进行第 194 轮训练: train_acc: 0.924983486533165, test_acc: 0.9298245906829834 正在进行第 195 轮训练: train_acc: 0.9198943674564362, test_acc: 0.9473684430122375 正在进行第 196 轮训练: train_acc: 0.9112841039896011, test_acc: 0.9649122953414917 正在进行第 197 轮训练: train_acc: 0.92264524102211, test_acc: 0.9473684430122375 正在进行第 198 轮训练: train_acc: 0.9144201129674911, test_acc: 0.9473684430122375 正在进行第 199 轮训练: train_acc: 0.921077236533165, test_acc: 0.9473684430122375 正在进行第 200 轮训练: train_acc: 0.9144201129674911, test_acc: 0.9473684430122375 。。。。 400轮达到98% 。。。。 正在进行第 379 轮训练: train_acc: 0.9206646084785461, test_acc: 0.9824561476707458 正在进行第 380 轮训练: train_acc: 0.9238006174564362, test_acc: 0.9824561476707458 正在进行第 381 轮训练: train_acc: 0.9238006174564362, test_acc: 0.9824561476707458 正在进行第 382 轮训练: train_acc: 0.9136223495006561, test_acc: 0.9736841917037964 正在进行第 383 轮训练: train_acc: 0.9202794879674911, test_acc: 0.9824561476707458 。。。。 再之后开始波动... 。。。。 rain_acc: 0.93045774102211, test_acc: 0.9824561476707458 正在进行第 989 轮训练: train_acc: 0.9175285995006561, test_acc: 0.9912281036376953 正在进行第 990 轮训练: train_acc: 0.928889736533165, test_acc: 0.9736841917037964 正在进行第 991 轮训练: train_acc: 0.94022336602211, test_acc: 0.9824561476707458 正在进行第 992 轮训练: train_acc: 0.9343364834785461, test_acc: 0.9824561476707458 正在进行第 993 轮训练: train_acc: 0.9030864834785461, test_acc: 0.9385964870452881 正在进行第 994 轮训练: train_acc: 0.92146235704422, test_acc: 0.9473684430122375 正在进行第 995 轮训练: train_acc: 0.9343364834785461, test_acc: 0.9824561476707458 |
|
|
|
|
from T import train from T import X_test import torch from torch import nn from torch.nn import functional as F class SmallBlock1d(nn.Module): """1维模型 """ def __init__(self, in_channel, hidden_size ): """简易1维模型 """ super().__init__() self.conv1 = nn.Sequential( nn.Conv1d(in_channels=256, out_channels=hidden_size*4, kernel_size=3,stride=1,padding=1), nn.BatchNorm1d(num_features=hidden_size*4), nn.Conv1d(in_channels=hidden_size*4, out_channels=hidden_size*4, kernel_size=3,stride=1,padding=1), nn.Conv1d(in_channels=hidden_size*4, out_channels=hidden_size, kernel_size=3,stride=1,padding=1), nn.ReLU(), ) def forward(self, x): h1 = x.clone() x = self.conv1(x) o = h1 + x return o class TextCNNML(nn.Module): """ TextCNN优化,多尺度+卷积 """ def __init__(self,in_features, out_features,L): super().__init__() hidden_size = 256 # [N, C, seq_len] -- [N, C, seq_len-1],[N, C, seq_len-kernel_size+1] self.gram_2 = nn.Sequential( nn.Conv1d(in_channels=in_features, out_channels=hidden_size, kernel_size=2), nn.BatchNorm1d(num_features=hidden_size), nn.ReLU(), nn.MaxPool1d(kernel_size=2) # [N, C, 1], ) # [N, C, seq_len, 1] -- [N, C, seq_len-2, 1] self.gram_3 = nn.Sequential( nn.Conv1d(in_channels=in_features, out_channels=hidden_size, kernel_size=3), nn.BatchNorm1d(num_features=hidden_size), nn.ReLU(), nn.MaxPool1d(kernel_size=2) # [N, C, 1] ) # [N, C, seq_len, 1] -- [N, C, seq_len-3, 1] self.gram_4 = nn.Sequential( nn.Conv1d(in_channels=in_features, out_channels=hidden_size, kernel_size=4), nn.BatchNorm1d(num_features=hidden_size), nn.ReLU(), nn.MaxPool1d(kernel_size=2) # [N, C, 1] ) self.s1 = SmallBlock1d(in_channel=256, hidden_size=hidden_size) self.s2 = SmallBlock1d(in_channel=256, hidden_size=hidden_size) self.s3 = SmallBlock1d(in_channel=256, hidden_size=hidden_size) self.s4 = SmallBlock1d(in_channel=256, hidden_size=hidden_size) self.s5 = SmallBlock1d(in_channel=256, hidden_size=hidden_size) self.s6 = SmallBlock1d(in_channel=256, hidden_size=hidden_size) self.s7 = SmallBlock1d(in_channel=256, hidden_size=hidden_size) self.s8 = SmallBlock1d(in_channel=256, hidden_size=hidden_size) self.s9 = SmallBlock1d(in_channel=256, hidden_size=hidden_size) self.s10 = SmallBlock1d(in_channel=256, hidden_size=hidden_size) self.s11 = SmallBlock1d(in_channel=256, hidden_size=hidden_size) self.s12 = SmallBlock1d(in_channel=256, hidden_size=hidden_size) self.s13 = SmallBlock1d(in_channel=256, hidden_size=hidden_size) self.s14 = SmallBlock1d(in_channel=256, hidden_size=hidden_size) self.s15 = SmallBlock1d(in_channel=256, hidden_size=hidden_size) self.s16 = SmallBlock1d(in_channel=256, hidden_size=hidden_size) self.s17 = SmallBlock1d(in_channel=256, hidden_size=hidden_size) self.s18 = SmallBlock1d(in_channel=256, hidden_size=hidden_size) self.s19 = SmallBlock1d(in_channel=256, hidden_size=hidden_size) self.s20 = SmallBlock1d(in_channel=256, hidden_size=hidden_size) self.s21 = SmallBlock1d(in_channel=256, hidden_size=hidden_size) self.s22 = SmallBlock1d(in_channel=256, hidden_size=hidden_size) self.s23 = SmallBlock1d(in_channel=256, hidden_size=hidden_size) self.s24 = SmallBlock1d(in_channel=256, hidden_size=hidden_size) self.s25 = SmallBlock1d(in_channel=256, hidden_size=hidden_size) self.s26 = SmallBlock1d(in_channel=256, hidden_size=hidden_size) self.s27 = SmallBlock1d(in_channel=256, hidden_size=hidden_size) self.s28 = SmallBlock1d(in_channel=256, hidden_size=hidden_size) self.s29 = SmallBlock1d(in_channel=256, hidden_size=hidden_size) self.s30 = SmallBlock1d(in_channel=256, hidden_size=hidden_size) self.s31 = SmallBlock1d(in_channel=256, hidden_size=hidden_size) self.s32 = SmallBlock1d(in_channel=256, hidden_size=hidden_size) self.s33 = SmallBlock1d(in_channel=256, hidden_size=hidden_size) self.s34 = SmallBlock1d(in_channel=256, hidden_size=hidden_size) self.s35 = SmallBlock1d(in_channel=256, hidden_size=hidden_size) self.s36 = SmallBlock1d(in_channel=256, hidden_size=hidden_size) self.s37 = SmallBlock1d(in_channel=256, hidden_size=hidden_size) self.s38 = SmallBlock1d(in_channel=256, hidden_size=hidden_size) self.s39 = SmallBlock1d(in_channel=256, hidden_size=hidden_size) self.s40 = SmallBlock1d(in_channel=256, hidden_size=hidden_size) self.s41 = SmallBlock1d(in_channel=256, hidden_size=hidden_size) self.s42 = SmallBlock1d(in_channel=256, hidden_size=hidden_size) self.s43 = SmallBlock1d(in_channel=256, hidden_size=hidden_size) self.s44 = SmallBlock1d(in_channel=256, hidden_size=hidden_size) self.s45 = SmallBlock1d(in_channel=256, hidden_size=hidden_size) self.s46 = SmallBlock1d(in_channel=256, hidden_size=hidden_size) self.s47 = SmallBlock1d(in_channel=256, hidden_size=hidden_size) self.s48 = SmallBlock1d(in_channel=256, hidden_size=hidden_size) self.s49 = SmallBlock1d(in_channel=256, hidden_size=hidden_size) self.s50 = SmallBlock1d(in_channel=256, hidden_size=hidden_size) self.s51 = SmallBlock1d(in_channel=256, hidden_size=hidden_size) self.s52 = SmallBlock1d(in_channel=256, hidden_size=hidden_size) self.s53 = SmallBlock1d(in_channel=256, hidden_size=hidden_size) self.s54 = SmallBlock1d(in_channel=256, hidden_size=hidden_size) self.s55 = SmallBlock1d(in_channel=256, hidden_size=hidden_size) self.s56 = SmallBlock1d(in_channel=256, hidden_size=hidden_size) self.s57 = SmallBlock1d(in_channel=256, hidden_size=hidden_size) self.s58 = SmallBlock1d(in_channel=256, hidden_size=hidden_size) self.s59 = SmallBlock1d(in_channel=256, hidden_size=hidden_size) self.s60 = SmallBlock1d(in_channel=256, hidden_size=hidden_size) self.conv1 = nn.Sequential( nn.Conv1d(in_channels=256, out_channels=hidden_size, kernel_size=3,stride=1,padding=1), nn.BatchNorm1d(num_features=hidden_size), nn.ReLU(), nn.MaxPool1d(kernel_size=2) ) self.conv2 = nn.Sequential( nn.Conv1d(in_channels=hidden_size, out_channels=hidden_size, kernel_size=3,stride=1,padding=1), nn.BatchNorm1d(num_features=hidden_size), nn.ReLU(), nn.MaxPool1d(kernel_size=2) ) self.dropout1 = nn.Dropout(p=0.2) # 256×1×1 self.ft = nn.Flatten() self.fc1 = nn.Linear(in_features=hidden_size*10, out_features=out_features) def forward(self,X): # [B, seq_len, embedding_dim] -- [B, embedding_dim, seq_len] # x = torch.permute(input=X, dims=(0, 2, 1)) x=X.unsqueeze(dim=1) # print(x.shape) # torch.Size([128, 256, 87]) x1 = self.gram_2(x) # print(f"x1.shape={x1.shape}") #torch.Size([128, 256, 1]) x2 = self.gram_3(x) x3 = self.gram_4(x) x = torch.concat(tensors=(x1,x2,x3),dim=2) # print(x.shape) # torch.Size([114, 256, 41]) x = self.s1(x) x = self.s2(x) x = self.s3(x) x = self.s4(x) x = self.s5(x) x = self.s6(x) x = self.s7(x) x = self.s8(x) x = self.s9(x) x = self.s10(x) x = self.s11(x) x = self.s12(x) x = self.s13(x) x = self.s14(x) x = self.s15(x) x = self.s16(x) x = self.s17(x) x = self.s18(x) x = self.s19(x) x = self.s20(x) x = self.s21(x) x = self.s22(x) x = self.s23(x) x = self.s24(x) x = self.s25(x) x = self.s26(x) x = self.s27(x) x = self.s28(x) x = self.s29(x) x = self.s30(x) x = self.s31(x) x = self.s32(x) x = self.s33(x) x = self.s34(x) x = self.s35(x) x = self.s36(x) x = self.s37(x) x = self.s38(x) x = self.s39(x) x = self.s40(x) x = self.s41(x) x = self.s42(x) x = self.s43(x) x = self.s44(x) x = self.s45(x) x = self.s46(x) x = self.s47(x) x = self.s48(x) x = self.s49(x) x = self.s50(x) x = self.s51(x) x = self.s52(x) x = self.s53(x) x = self.s54(x) x = self.s55(x) x = self.s56(x) x = self.s57(x) x = self.s58(x) x = self.s59(x) x = self.s60(x) x = self.conv1(x) x = self.conv2(x) x = self.dropout1(x) # print(x.shape) x = self.ft(x) # print(x.shape) x = self.fc1(x) return x model = TextCNNML(in_features=1, out_features=2, L=X_test.shape[1]) model(X_test)[:3] tensor([[-0.3637, -0.4065], [ 0.2695, 1.0281], [ 0.3592, 0.0745]], grad_fn=SliceBackward0) |
train(model) 之前有明显的欠拟合现象, 将卷积的部分改为hidden_size -- hidden_size*4 -- hidden_size之后, 可能是由于网络结构变复杂了,欠拟合的现象有所改善 self.conv1 = nn.Sequential( nn.Conv1d(in_channels=256, out_channels=hidden_size*4, kernel_size=3,stride=1,padding=1), nn.BatchNorm1d(num_features=hidden_size*4), nn.Conv1d(in_channels=hidden_size*4, out_channels=hidden_size*4, kernel_size=3,stride=1,padding=1), nn.Conv1d(in_channels=hidden_size*4, out_channels=hidden_size, kernel_size=3,stride=1,padding=1), nn.ReLU(), ) 正在进行第 236 轮训练: train_acc: 0.939838245511055, test_acc: 0.9736841917037964 正在进行第 237 轮训练: train_acc: 0.94803586602211, test_acc: 0.9649122953414917 正在进行第 238 轮训练: train_acc: 0.9413787424564362, test_acc: 0.9561403393745422 正在进行第 239 轮训练: train_acc: 0.94099360704422, test_acc: 0.9736841917037964 ...... 稳定性也稍微好了一点 ...... 正在进行第 266 轮训练: train_acc: 0.9308153539896011, test_acc: 0.9736841917037964 正在进行第 267 轮训练: train_acc: 0.94608274102211, test_acc: 0.9649122953414917 正在进行第 268 轮训练: train_acc: 0.951556995511055, test_acc: 0.9649122953414917 最终稳定了在98%,但还有点欠拟合 训练集上中间还是出现过95%的精度的,可能是网络复杂后,需要训练的轮次变得多了 - 估计得几千轮... 正在进行第 492 轮训练: train_acc: 0.928889736533165, test_acc: 0.9824561476707458 正在进行第 493 轮训练: train_acc: 0.92341548204422, test_acc: 0.9824561476707458 正在进行第 494 轮训练: train_acc: 0.9183263629674911, test_acc: 0.9824561476707458 正在进行第 495 轮训练: train_acc: 0.9194817245006561, test_acc: 0.9824561476707458 正在进行第 496 轮训练: train_acc: 0.9167583584785461, test_acc: 0.9824561476707458 正在进行第 497 轮训练: train_acc: 0.923030361533165, test_acc: 0.9385964870452881 正在进行第 498 轮训练: train_acc: 0.91950923204422, test_acc: 0.9824561476707458 正在进行第 499 轮训练: train_acc: 0.9222326129674911, test_acc: 0.9824561476707458 正在进行第 500 轮训练: train_acc: 0.9198943674564362, test_acc: 0.9824561476707458 |
from T import train from T import X_test import torch from torch import nn from tpf.mlib.seq import ShortBlock1d class TextCNNML(nn.Module): """ TextCNN优化,多尺度+卷积 """ def __init__(self,in_features, out_features,L): super().__init__() hidden_size = 256 # [N, C, seq_len] -- [N, C, seq_len-1],[N, C, seq_len-kernel_size+1] self.gram_2 = nn.Sequential( nn.Conv1d(in_channels=in_features, out_channels=hidden_size, kernel_size=2), nn.BatchNorm1d(num_features=hidden_size), nn.ReLU(), nn.MaxPool1d(kernel_size=2) # [N, C, 1], ) # [N, C, seq_len, 1] -- [N, C, seq_len-2, 1] self.gram_3 = nn.Sequential( nn.Conv1d(in_channels=in_features, out_channels=hidden_size, kernel_size=3), nn.BatchNorm1d(num_features=hidden_size), nn.ReLU(), nn.MaxPool1d(kernel_size=2) # [N, C, 1] ) # [N, C, seq_len, 1] -- [N, C, seq_len-3, 1] self.gram_4 = nn.Sequential( nn.Conv1d(in_channels=in_features, out_channels=hidden_size, kernel_size=4), nn.BatchNorm1d(num_features=hidden_size), nn.ReLU(), nn.MaxPool1d(kernel_size=2) # [N, C, 1] ) self.short = ShortBlock1d(in_channel=hidden_size, hidden_size=hidden_size) self.conv1 = nn.Sequential( nn.Conv1d(in_channels=hidden_size, out_channels=hidden_size, kernel_size=3,stride=1,padding=1), nn.BatchNorm1d(num_features=hidden_size), nn.ReLU(), nn.MaxPool1d(kernel_size=2) ) self.conv2 = nn.Sequential( nn.Conv1d(in_channels=hidden_size, out_channels=hidden_size, kernel_size=3,stride=1,padding=1), nn.BatchNorm1d(num_features=hidden_size), nn.ReLU(), nn.MaxPool1d(kernel_size=2) ) self.dropout1 = nn.Dropout(p=0.2) # in_channel×1×1 self.ft = nn.Flatten() self.fc1 = nn.Linear(in_features=hidden_size*10, out_features=out_features) def forward(self,X): # [B, seq_len, embedding_dim] -- [B, embedding_dim, seq_len] # x = torch.permute(input=X, dims=(0, 2, 1)) x=X.unsqueeze(dim=1) # print(x.shape) # torch.Size([128, in_channel, 87]) x1 = self.gram_2(x) # print(f"x1.shape={x1.shape}") #torch.Size([128, in_channel, 1]) x2 = self.gram_3(x) x3 = self.gram_4(x) x = torch.concat(tensors=(x1,x2,x3),dim=2) # print(x.shape) # torch.Size([114, in_channel, 41]) x = self.short(x) x = self.conv1(x) x = self.conv2(x) x = self.dropout1(x) # print(x.shape) x = self.ft(x) # print(x.shape) x = self.fc1(x) return x model = TextCNNML(in_features=1, out_features=2, L=X_test.shape[1]) model(X_test)[:3] tensor([[-0.0809, -0.0281], [-0.4714, -0.6860], [-0.3987, -0.4960]], grad_fn=SliceBackward0) |
|
|
这里数据为交易数据 k=2,3,4相当于合并相邻的2个特征,3个特征,4个特征 但没有区分出主次 于是加入注意力,注意力机制会对特征进行打分,将特征的重要性分别开来 注意力就是要从整体的角度去划分局部的重要性 - 合而为1是整体 - 分开之后,层次分明 |
import torch from torch import nn from torch.nn import functional as F class TextCNNML(nn.Module): """ TextCNN优化,多尺度+卷积 """ def __init__(self,in_features, out_features,seq_len): super().__init__() hidden_size = 256 # [N, C, seq_len] -- [N, C, seq_len-1],[N, C, seq_len-kernel_size+1] self.gram_2 = nn.Sequential( nn.Conv1d(in_channels=in_features, out_channels=hidden_size, kernel_size=2,stride=1,padding=1), nn.BatchNorm1d(num_features=hidden_size), nn.ReLU(), nn.MaxPool1d(kernel_size=2) # [N, C, 1], ) # [N, C, seq_len, 1] -- [N, C, seq_len-2, 1] self.gram_3 = nn.Sequential( nn.Conv1d(in_channels=in_features, out_channels=hidden_size, kernel_size=3,stride=1,padding=1), nn.BatchNorm1d(num_features=hidden_size), nn.ReLU(), nn.MaxPool1d(kernel_size=2) # [N, C, 1] ) # [N, C, seq_len, 1] -- [N, C, seq_len-3, 1] self.gram_4 = nn.Sequential( nn.Conv1d(in_channels=in_features, out_channels=hidden_size, kernel_size=4,stride=1,padding=1), nn.BatchNorm1d(num_features=hidden_size), nn.ReLU(), nn.MaxPool1d(kernel_size=2) # [N, C, 1] ) self.conv1 = nn.Sequential( nn.Conv1d(in_channels=256, out_channels=hidden_size, kernel_size=3,stride=1,padding=1), nn.BatchNorm1d(num_features=hidden_size), nn.ReLU(), nn.MaxPool1d(kernel_size=2) ) self.conv2 = nn.Sequential( nn.Conv1d(in_channels=hidden_size, out_channels=hidden_size, kernel_size=3,stride=1,padding=1), nn.BatchNorm1d(num_features=hidden_size), nn.ReLU(), nn.MaxPool1d(kernel_size=2) ) self.conv3 = nn.Sequential( nn.Conv1d(in_channels=in_features, out_channels=hidden_size, kernel_size=3,stride=1,padding=1), ) self.dropout1 = nn.Dropout(p=0.2) # 256×1×1 self.ft = nn.Flatten() ll = ((seq_len+2*1)-2+1)//2+((seq_len+2*1)-3+1)//2+((seq_len+2*1)-4+1)//2 +seq_len ll=(ll//4)*256 print(ll) self.fc1 = nn.Linear(in_features=ll, out_features=hidden_size) self.dropout2 = nn.Dropout(p=0.5) self.fc2 = nn.Linear(in_features=hidden_size, out_features=out_features) def attention(self, Q, K, V, mask=None, multihead=False): """序列数据注意力计算函数 - [batch_size,seq_len,embedding_dim],embedding_dim是要变换的维度 - 变换的是特征维度,特征维度放在最后的一个维度上 - 序列的维度放在倒数第2维上 - embedding_dim:大于0表示使用多头注意力,其值是数据原来的维度,即embedding_dim,多头合并为embedding_dim params ----------------------------------- - seq_len:序列特征个数,[B,L,C]中的L Q (_type_): _description_ K (_type_): _description_ V (_type_): _description_ mask (_type_): _description_ """ # b句话,每句话50个词,每个词编码成32维向量,4个头,每个头分到8维向量 # Q,K,V = [b, 4, 50, 8] n_shape = len(Q.shape) # [b, 4, 50, 8] * [b, 4, 8, 50] -> [b, 4, 50, 50] # Q,K矩阵相乘,求每个词相对其他所有词的注意力 if n_shape == 4: #embedding被拆分,多头注意力 seq_len = Q.shape[2] score = torch.matmul(Q, K.permute(0, 1, 3, 2)) elif n_shape == 3: seq_len = Q.shape[1] score = torch.matmul(Q, K.permute(0, 2, 1)) else: raise Exception(f"only 3 or 4 dim,now is {n_shape}") # 除以每个头维数的平方根,做数值缩放 k = Q.shape[-1] score /= k ** 0.5 # mask 遮盖,mask是true的地方都被替换成-inf,这样在计算softmax的时候,-inf会被压缩到0 # mask = [b, 1, seq_len, seq_len] if mask is not None: score = score.masked_fill_(mask, -float('inf')) score = torch.softmax(score, dim=-1) # 以注意力分数乘以V,得到最终的注意力结果 # [b, 4, 50, 50] * [b, 4, 50, 8] -> [b, 4, 50, 8] score = torch.matmul(score, V) # 每个头计算的结果合一 # [b, 4, 50, 8] -> [b, 50, 32] if multihead : head_n = Q.shape[1] k = Q.shape[-1] embedding_dim = head_n*k score = score.permute(0, 2, 1, 3).reshape(-1, seq_len, embedding_dim) return score def forward(self,X): # [B, seq_len] -- [B, 1, seq_len] seq_len = X.shape[1] x=X.unsqueeze(dim=1) x1 = self.gram_2(x) # print(f"x1.shape={x1.shape}") #torch.Size([128, 256, 1]) x2 = self.gram_3(x) x3 = self.gram_4(x) x4 = self.attention(x,x,x) x4 = self.conv3(x4) # print('x3',x3.shape,'x4',x4.shape) x = torch.concat(tensors=(x1,x2,x3,x4),dim=2) # print(x1.shape,x2.shape,x3.shape,x.shape) # torch.Size([114, 256, 71]) x = self.conv1(x) x = self.conv2(x) x = self.dropout1(x) # print(x.shape) x = self.ft(x) # print(x.shape) x = self.fc1(x) x = self.dropout2(x) x = self.fc2(x) return x model = TextCNNML(in_features=1, out_features=2, seq_len=X_test.shape[1]) model(X_test)[:3] |
30000轮训练,每轮的速度很快 train(model) 这是首次深度学习中训练集上精度稳定出现96% 正在进行第 14361 轮训练: train_acc: 0.95975461602211, test_acc: 0.9649122953414917 正在进行第 14371 轮训练: train_acc: 0.969135120511055, test_acc: 0.9736841917037964 正在进行第 14381 轮训练: train_acc: 0.964045986533165, test_acc: 0.9736841917037964 正在进行第 14391 轮训练: train_acc: 0.9554357379674911, test_acc: 0.9649122953414917 正在进行第 14401 轮训练: train_acc: 0.965228870511055, test_acc: 0.9649122953414917 正在进行第 14411 轮训练: train_acc: 0.964045986533165, test_acc: 0.9649122953414917 正在进行第 14421 轮训练: train_acc: 0.962092861533165, test_acc: 0.9561403393745422 偶尔会降一下 正在进行第 14701 轮训练: train_acc: 0.9550506174564362, test_acc: 0.9561403393745422 正在进行第 14711 轮训练: train_acc: 0.9050396084785461, test_acc: 0.9298245906829834 正在进行第 14721 轮训练: train_acc: 0.966796875, test_acc: 0.9736841917037964 正在进行第 14731 轮训练: train_acc: 0.9589568674564362, test_acc: 0.9649122953414917 正在进行第 14741 轮训练: train_acc: 0.962092861533165, test_acc: 0.9649122953414917 正在进行第 14751 轮训练: train_acc: 0.96366086602211, test_acc: 0.9649122953414917 。。。。 。。。。 。。。。 正在进行第 16111 轮训练: train_acc: 0.961322620511055, test_acc: 0.9824561476707458 正在进行第 16121 轮训练: train_acc: 0.97147336602211, test_acc: 0.9736841917037964 正在进行第 16131 轮训练: train_acc: 0.9179412424564362, test_acc: 0.9298245906829834 正在进行第 16141 轮训练: train_acc: 0.960139736533165, test_acc: 0.9824561476707458 正在进行第 16151 轮训练: train_acc: 0.94489985704422, test_acc: 0.9473684430122375 正在进行第 16161 轮训练: train_acc: 0.9511443674564362, test_acc: 0.9561403393745422 偶尔训练集到达97%,98% 正在进行第 20081 轮训练: train_acc: 0.962092861533165, test_acc: 0.9649122953414917 正在进行第 20091 轮训练: train_acc: 0.97265625, test_acc: 0.9736841917037964 正在进行第 20101 轮训练: train_acc: 0.9558208584785461, test_acc: 0.9649122953414917 正在进行第 20111 轮训练: train_acc: 0.964045986533165, test_acc: 0.9649122953414917 。。。。 。。。。 。。。。 正在进行第 26851 轮训练: train_acc: 0.973041370511055, test_acc: 0.9649122953414917 正在进行第 26861 轮训练: train_acc: 0.96443110704422, test_acc: 0.9649122953414917 正在进行第 26871 轮训练: train_acc: 0.958186611533165, test_acc: 0.9649122953414917 正在进行第 26881 轮训练: train_acc: 0.98046875, test_acc: 0.9736841917037964 正在进行第 26891 轮训练: train_acc: 0.967181995511055, test_acc: 0.9736841917037964 最终到30000轮时,感觉这一次波动还没有缓冲过来, - 猜测增加到50000轮,精度还能升一些 - 欠拟合问题基本消失 - 30000轮训练,消耗时间大概20多分钟 正在进行第 29911 轮训练: train_acc: 0.96952024102211, test_acc: 0.9649122953414917 正在进行第 29921 轮训练: train_acc: 0.97537961602211, test_acc: 0.9561403393745422 正在进行第 29931 轮训练: train_acc: 0.967952236533165, test_acc: 0.9649122953414917 正在进行第 29941 轮训练: train_acc: 0.965999111533165, test_acc: 0.9736841917037964 正在进行第 29951 轮训练: train_acc: 0.97029048204422, test_acc: 0.9561403393745422 正在进行第 29961 轮训练: train_acc: 0.96443110704422, test_acc: 0.9824561476707458 正在进行第 29971 轮训练: train_acc: 0.97147336602211, test_acc: 0.9561403393745422 正在进行第 29981 轮训练: train_acc: 0.954280361533165, test_acc: 0.9473684430122375 正在进行第 29991 轮训练: train_acc: 0.96247798204422, test_acc: 0.9649122953414917 |
|
|
如何在Python中测量程序运行时间