Understanding LSTM Networks http://colah.github.io/posts/2015-08-Understanding-LSTMs/ |
RNN的思想很好, - 一件事有过去,现在,未来 三个阶段,RNN卡的是当下 但问题是当下发生的事情太多了,它并不能很好的 从全局(过去,现在,未来)的视角去确定一件事 - 哪个当下对于整体是重要的 - 整体的关键节点是哪些 后来的注意力解决这个问题 - 重要信息 能否将RNN的结果与注意整合使用呢?这样效果会不会更好? |
class TextRNN1(nn.Module): """ 基于基础RNN结构来进行情感分析 """ def __init__(self, num_embeddings, embedding_dim, padding_idx): super(TextRNN1, self).__init__() self.embed = nn.Embedding(num_embeddings=num_embeddings, embedding_dim=embedding_dim, padding_idx=padding_idx) self.rnn = nn.RNN(input_size=256, hidden_size=512, bidirectional=True) self.fc = nn.Linear(in_features=512, out_features=2) def forward(self, x): # [b, 86] -- [b, 86, 256] x = self.embed(x) h0 = torch.zeros(2, x.size(0), 512).float().to(device=device) # [b, 86, 256] -- [86, b, 256] x = torch.permute(input=x, dims=(1, 0, 2)) out, hn = self.rnn(x, h0) # hn = torch.squeeze(input=hn, dim=0) hn = torch.sum(input=hn, dim=0) out = self.fc(hn) return out class TextRNN2(nn.Module): """ """ def __init__(self, num_embeddings, embedding_dim, padding_idx): super(TextRNN2, self).__init__() self.embed = nn.Embedding(num_embeddings=num_embeddings, embedding_dim=embedding_dim, padding_idx=padding_idx) self.rnn = nn.RNN(input_size=256, hidden_size=512) self.fc = nn.Linear(in_features=512, out_features=2) def forward(self, x): # [b, 86] -- [b, 86, 256] x = self.embed(x) h0 = torch.zeros(1, x.size(0), 512).float() # [b, 86, 256] -- [86, b, 256] x = torch.permute(input=x, dims=(1, 0, 2)) # out: [86, b, 512] out, hn = self.rnn(x, h0) # hn = torch.squeeze(input=hn, dim=0) out = torch.sum(input=out, dim=0) out = self.fc(out) return out |
class TextRNN3(nn.Module): """ """ def __init__(self, num_embeddings, embedding_dim, padding_idx): super(TextRNN3, self).__init__() self.embed = nn.Embedding(num_embeddings=num_embeddings, embedding_dim=embedding_dim, padding_idx=padding_idx) self.lstm = nn.LSTM(input_size=256, hidden_size=512, num_layers=2, bidirectional=True) self.fc = nn.Linear(in_features=512, out_features=2) def forward(self, x): # [b, 86] -- [b, 86, 256] x = self.embed(x) # [b, 86, 256] -- [86, b, 256] x = torch.permute(input=x, dims=(1, 0, 2)) # [1, b, 512] h0 = torch.zeros(4, x.size(1), 512).float().to(device=device) c0 = torch.zeros(4, x.size(1), 512).float().to(device=device) out, (hn, cn) = self.lstm(x, (h0, c0)) # out = torch.sum(input=out, dim=0) # cn = torch.squeeze(input=cn, dim=0) cn = torch.sum(input=cn, dim=0) # hn = hn[-1] out = self.fc(cn) return out |
class TextRNN4(nn.Module): """ 基于 GRU 结构来进行情感分析 """ def __init__(self, num_embeddings, embedding_dim, padding_idx): super(TextRNN4, self).__init__() self.embed = nn.Embedding(num_embeddings=num_embeddings, embedding_dim=embedding_dim, padding_idx=padding_idx) self.gru = nn.GRU(input_size=256, hidden_size=512, bidirectional=True) self.fc1 = nn.Linear(in_features=512, out_features=128) self.fc2 = nn.Linear(in_features=128, out_features=2) def forward(self, x): x = self.embed(x) h0 = torch.zeros(2, x.size(0), 512).float().to(device=device) x = torch.permute(input=x, dims=(1, 0, 2)) out, hn = self.gru(x, h0) # hn = torch.squeeze(input=hn, dim=0) hn = torch.sum(input=hn, dim=0) out = self.fc1(hn) out = F.relu(out) out = self.fc2(out) return out |
import os from tpf import pkl_save,pkl_load BASE_DIR = "/root/datasets/hotel_reader" file_path = os.path.join(BASE_DIR,'data_pkl/word.pkl') X_train,y_train,X_test,y_test,words_set,word2idx,idx2word = pkl_load(file_path) # 字典长度 dict_len = len(words_set) # 序列长度 seq_len = 512 # 训练集 X_train1 = [] for x in X_train: temp = x + ["<PAD>"] * seq_len X_train1.append(temp[:seq_len]) # 测试集 X_test1 = [] for x in X_test: temp = x + ["<PAD>"] * seq_len X_test1.append(temp[:seq_len]) """ 索引向量化 """ # 训练集向量化 X_train2 = [] for x in X_train1: temp = [] for word in x: idx = word2idx[word] if word in word2idx else word2idx["<UNK>"] temp.append(idx) X_train2.append(temp) # 测试集向量化 X_test2 = [] for x in X_test1: temp = [] for word in x: idx = word2idx[word] if word in word2idx else word2idx["<UNK>"] temp.append(idx) X_test2.append(temp) """ 构建数据集 """ import numpy as np from torch.utils.data import Dataset from torch.utils.data import DataLoader import torch from torch import nn from torch.nn import functional as F class MyDataSet(Dataset): def __init__(self, X=X_train2, y=y_train): self.X = X self.y = y def __len__(self): return len(self.X) def __getitem__(self, idx): x = self.X[idx] y = self.y[idx] return torch.tensor(data=x).long(), torch.tensor(data=y).long() train_dataset = MyDataSet(X=X_train2, y=y_train) train_dataloader = DataLoader(dataset=train_dataset, shuffle=True, batch_size=128) test_dataset = MyDataSet(X=X_test2, y=y_test) test_dataloader = DataLoader(dataset=test_dataset, shuffle=False, batch_size=256) train_dataset[0][0][:7] tensor([ 5321, 706, 20311, 14111, 1036, 823, 14111]) class TextRNN1(nn.Module): """ 基于基础RNN结构来进行情感分析 """ def __init__(self, num_embeddings, embedding_dim, padding_idx): super(TextRNN1, self).__init__() self.embed = nn.Embedding(num_embeddings=num_embeddings, embedding_dim=embedding_dim, padding_idx=padding_idx) self.rnn = nn.RNN(input_size=256, hidden_size=512, bidirectional=True) self.fc = nn.Linear(in_features=512, out_features=2) def forward(self, x): # [b, 86] -- [b, 86, 256] x = self.embed(x) h0 = torch.zeros(2, x.size(0), 512).float().to(device=device) # [b, 86, 256] -- [86, b, 256] x = torch.permute(input=x, dims=(1, 0, 2)) out, hn = self.rnn(x, h0) # hn = torch.squeeze(input=hn, dim=0) hn = torch.sum(input=hn, dim=0) out = self.fc(hn) return out model = TextRNN1(num_embeddings=dict_len, embedding_dim=256, padding_idx=word2idx[" # 定义训练轮次 epochs = 200 device = "cuda:0" if torch.cuda.is_available() else "cpu" # 定义过程监控函数 def get_acc(dataloader=train_dataloader, model=model): accs = [] model.to(device=device) model.eval() with torch.no_grad(): for X,y in dataloader: X=X.to(device=device) y=y.to(device=device) y_pred = model(X) y_pred = y_pred.argmax(dim=1) acc = (y_pred == y).float().mean().item() accs.append(acc) return np.array(accs).mean() # 定义训练过程 def train(model=model, optimizer=optimizer, loss_fn=loss_fn, epochs=epochs, train_dataloader=train_dataloader, test_dataloader=test_dataloader): model.to(device=device) for epoch in range(1, epochs+1): print(f"正在进行第 {epoch} 轮训练:") model.train() for X,y in train_dataloader: X=X.to(device=device) y=y.to(device=device) # 正向传播 y_pred = model(X) # 清空梯度 optimizer.zero_grad() # 计算损失 loss = loss_fn(y_pred, y) # 梯度下降 loss.backward() # 优化一步 optimizer.step() print(f"train_acc: {get_acc(dataloader=train_dataloader)}, test_acc: {get_acc(dataloader=test_dataloader)}") train() 到200轮才76%的精度,并且增长也非常慢了,模型表达能力有限 正在进行第 194 轮训练: train_acc: 0.8244243421052632, test_acc: 0.7652698874473571 正在进行第 195 轮训练: train_acc: 0.8225740131578947, test_acc: 0.7633522748947144 正在进行第 196 轮训练: train_acc: 0.8270970394736842, test_acc: 0.7621448874473572 正在进行第 197 轮训练: train_acc: 0.8283305921052632, test_acc: 0.7641335248947143 正在进行第 198 轮训练: train_acc: 0.8279194078947368, test_acc: 0.7641335248947143 正在进行第 199 轮训练: train_acc: 0.8279194078947368, test_acc: 0.7644886374473572 正在进行第 200 轮训练: train_acc: 0.8256578947368421, test_acc: 0.7644886374473572 |
from datasets import train_dataloader from datasets import test_dataloader from datasets import words_set,word2idx,idx2word from T import train import torch from torch import nn class TextRNN1(nn.Module): """ 基于基础RNN结构来进行情感分析 """ def __init__(self, num_embeddings, embedding_dim, padding_idx): super(TextRNN1, self).__init__() self.embed = nn.Embedding(num_embeddings=num_embeddings, embedding_dim=embedding_dim, padding_idx=padding_idx) self.rnn = nn.RNN(input_size=256, hidden_size=512, bidirectional=True) self.fc = nn.Linear(in_features=512, out_features=2) def forward(self, x): # [b, 86] -- [b, 86, 256] x = self.embed(x) h0 = torch.zeros(2, x.size(0), 512).float().to(device=device) # [b, 86, 256] -- [86, b, 256] x = torch.permute(input=x, dims=(1, 0, 2)) out, hn = self.rnn(x, h0) # hn = torch.squeeze(input=hn, dim=0) hn = torch.sum(input=hn, dim=0) out = self.fc(hn) return out device = "cuda:0" if torch.cuda.is_available() else "cpu" # 字典长度 dict_len = len(words_set) model = TextRNN1(num_embeddings=dict_len, embedding_dim=256, padding_idx=word2idx["<PAD>"]) model.to(device=device) # 定义优化器 optimizer = torch.optim.SGD(params=model.parameters(), lr=1e-3) # 定义损失函数 loss_fn = nn.CrossEntropyLoss() train(model) |
from T import words_set,word2idx,idx2word,dict_len,seq_len from T import train import torch from torch import nn device = "cuda:0" if torch.cuda.is_available() else "cpu" class SingleRNNDefine(nn.Module): def __init__(self,input_size,hidden_size): super().__init__() # [batch_size,embedding_dim]@[embedding_dim,hidden_size] = [batch_size,hidden_size] self.cell_linear_x = nn.Linear(in_features=input_size, out_features=hidden_size) self.cell_linear_h = nn.Linear(in_features=hidden_size, out_features=hidden_size) def forward(self,x,h0): seq_len, batch_size, embedding = x.shape output = [] ht = h0[0] # [1,batch_size,embedding] for t in range(seq_len): # print(f"x[{t}].shape={x[t].shape}") # x[86].shape=torch.Size([32, 256]) # [batch_size,embedding] -- [batch_size,hidden_size] # 对于每一个时间步来说,不需要管seq_len的维度,因为一步一个单词 each_word = self.cell_linear_x(x[t]) # print(f"t={t},each_word.shape={each_word.shape}") # print(f"t={t},ht.shape={ht.shape}") ht = self.cell_linear_h(ht) ht = torch.tanh(each_word + ht) # print(ht.shape) # torch.Size([32, 512]) output.append(ht.tolist()) hn = torch.unsqueeze(input=ht,dim=0) # print(hn.shape) # torch.Size([1, 32, 512]) output = torch.Tensor(output) return output,hn class TextRNN1(nn.Module): """ 基于基础RNN结构来进行情感分析 """ def __init__(self, num_embeddings, embedding_dim, padding_idx): super(TextRNN1, self).__init__() self.embed = nn.Embedding(num_embeddings=num_embeddings, embedding_dim=embedding_dim, padding_idx=padding_idx) self.rnn = SingleRNNDefine(input_size=256, hidden_size=512) self.fc = nn.Linear(in_features=512, out_features=2) def forward(self, x): # [b, 86] -- [b, 86, 256] x = self.embed(x) h0 = torch.zeros(2, x.size(0), 512).float().to(device=device) # [b, 86, 256] -- [86, b, 256] x = torch.permute(input=x, dims=(1, 0, 2)) out, hn = self.rnn(x, h0) # hn = torch.squeeze(input=hn, dim=0) hn = torch.sum(input=hn, dim=0) out = self.fc(hn) return out model = TextRNN1(num_embeddings=dict_len, embedding_dim=256, padding_idx=word2idx[" 代码可以运行 a = torch.randint(0,dict_len-1,(64,seq_len)).to(device=device) # model(a) 但训练时不仅慢,而且输出结果不会变 train(model) 正在进行第 1 轮训练: train_acc: 0.4967105263157895, test_acc: 0.4947443246841431 正在进行第 2 轮训练: train_acc: 0.4995888157894737, test_acc: 0.4947443246841431 正在进行第 3 轮训练: train_acc: 0.49773848684210525, test_acc: 0.4947443246841431 正在进行第 4 轮训练: train_acc: 0.4985608552631579, test_acc: 0.4947443246841431 正在进行第 5 轮训练: train_acc: 0.4981496710526316, test_acc: 0.4947443246841431 正在进行第 6 轮训练: train_acc: 0.4985608552631579, test_acc: 0.4947443246841431 正在进行第 7 轮训练: train_acc: 0.4981496710526316, test_acc: 0.4947443246841431 正在进行第 8 轮训练: train_acc: 0.4979440789473684, test_acc: 0.4947443246841431 正在进行第 9 轮训练: train_acc: 0.4975328947368421, test_acc: 0.4947443246841431 正在进行第 10 轮训练: train_acc: 0.4967105263157895, test_acc: 0.4947443246841431 正在进行第 11 轮训练: train_acc: 0.49897203947368424, test_acc: 0.4947443246841431 正在进行第 12 轮训练: train_acc: 0.49712171052631576, test_acc: 0.4947443246841431 正在进行第 13 轮训练: train_acc: 0.49835526315789475, test_acc: 0.4947443246841431 正在进行第 14 轮训练: train_acc: 0.4995888157894737, test_acc: 0.4947443246841431 正在进行第 15 轮训练: train_acc: 0.4987664473684211, test_acc: 0.4947443246841431 上次在1维卷积中使用BN就遇到了这样的情况 ... |
import os from tpf import pkl_save,pkl_load BASE_DIR = "/root/datasets/hotel_reader" file_path = os.path.join(BASE_DIR,'data_pkl/word.pkl') X_train,y_train,X_test,y_test,words_set,word2idx,idx2word = pkl_load(file_path) # 字典长度 dict_len = len(words_set) # 序列长度 seq_len = 512 # 训练集 X_train1 = [] for x in X_train: temp = x + ["<PAD>"] * seq_len X_train1.append(temp[:seq_len]) # 测试集 X_test1 = [] for x in X_test: temp = x + ["<PAD>"] * seq_len X_test1.append(temp[:seq_len]) """ 索引向量化 """ # 训练集向量化 X_train2 = [] for x in X_train1: temp = [] for word in x: idx = word2idx[word] if word in word2idx else word2idx["<UNK>"] temp.append(idx) X_train2.append(temp) # 测试集向量化 X_test2 = [] for x in X_test1: temp = [] for word in x: idx = word2idx[word] if word in word2idx else word2idx["<UNK>"] temp.append(idx) X_test2.append(temp) """ 构建数据集 """ import numpy as np from torch.utils.data import Dataset from torch.utils.data import DataLoader import torch from torch import nn from torch.nn import functional as F class MyDataSet(Dataset): def __init__(self, X=X_train2, y=y_train): self.X = X self.y = y def __len__(self): return len(self.X) def __getitem__(self, idx): x = self.X[idx] y = self.y[idx] return torch.tensor(data=x).long(), torch.tensor(data=y).long() train_dataset = MyDataSet(X=X_train2, y=y_train) train_dataloader = DataLoader(dataset=train_dataset, shuffle=True, batch_size=128) test_dataset = MyDataSet(X=X_test2, y=y_test) test_dataloader = DataLoader(dataset=test_dataset, shuffle=False, batch_size=256) # 定义训练轮次 epochs = 200 device = "cuda:0" if torch.cuda.is_available() else "cpu" # 定义过程监控函数 def get_acc(dataloader=train_dataloader, model=None): accs = [] model.to(device=device) model.eval() with torch.no_grad(): for X,y in dataloader: X=X.to(device=device) y=y.to(device=device) y_pred = model(X) y_pred = y_pred.argmax(dim=1) acc = (y_pred == y).float().mean().item() accs.append(acc) return np.array(accs).mean() # 定义训练过程 def train(model, epochs=epochs, train_dataloader=train_dataloader, test_dataloader=test_dataloader): model.to(device=device) # 定义优化器 optimizer = torch.optim.SGD(params=model.parameters(), lr=1e-3) # 定义损失函数 loss_fn = nn.CrossEntropyLoss() for epoch in range(1, epochs+1): print(f"正在进行第 {epoch} 轮训练:") model.train() for X,y in train_dataloader: X=X.to(device=device) y=y.to(device=device) # 正向传播 y_pred = model(X) # 清空梯度 optimizer.zero_grad() # 计算损失 loss = loss_fn(y_pred, y) # 梯度下降 loss.backward() # 优化一步 optimizer.step() print(f"train_acc: {get_acc(dataloader=train_dataloader,model=model)}, test_acc: {get_acc(dataloader=test_dataloader,model=model)}") |
|
RNN的思想:数据 在时间上的 展开/变化 但问题是没有重点 优化方案1:设置参数学习谁重要 hn = torch.sum(input=out, dim=0) 原来的做法是,直接求和/平均值 现在改为可学习参数 该方案有效但训练波动大 优化方法2:相同的方法,增加并行 单条链上RNN表达能力不足,再来一条相同的 效果上没有什么增益 舍弃 优化3:取一个自注意力,与RNN输出信息融合 有效 ,并且收敛速度也快了一些 |
from T import words_set,word2idx,idx2word,dict_len,seq_len,device from T import train class TextRNN1(nn.Module): """ 基于基础RNN结构来进行情感分析 """ def __init__(self, num_embeddings, embedding_dim, padding_idx,seq_len): super(TextRNN1, self).__init__() self.embed = nn.Embedding(num_embeddings=num_embeddings, embedding_dim=embedding_dim, padding_idx=padding_idx) self.rnn = nn.RNN(input_size=512, hidden_size=512) self.w = nn.Parameter(torch.ones(seq_len,1, dtype=torch.float32),requires_grad=True) self.fc = nn.Linear(in_features=512, out_features=2) def forward(self, x): x = self.embed(x) h0 = torch.zeros(1, x.size(0), 512).float().to(device=device) x = torch.permute(input=x, dims=(1, 0, 2)) out, hn = self.rnn(x, h0) out = torch.permute(input=out, dims=(1, 0, 2)) #[B,seq_len,embedding_dim] out = torch.permute(input=out, dims=(0, 2, 1)) #[B,embedding_dim,seq_len] out = out@self.w # print(1,out.shape) out = torch.squeeze(input=out, dim=2) # print(2,out.shape) # hn = torch.sum(input=out, dim=0) out = self.fc(out) return out device = "cuda:0" if torch.cuda.is_available() else "cpu" # device = "cuda:0" if torch.cuda.is_available() else "cpu" model = TextRNN1(num_embeddings=dict_len, embedding_dim=512, padding_idx=word2idx[" a = torch.randint(0,dict_len-1,(64,seq_len)).to(device=device) # model(a) a = torch.randn(64,3,7) w = torch.randn(7,1) (a@w).shape #torch.Size([64, 3, 1]) 可能是由于网络过于简单,导致训练过程波动较大 正在进行第 1 轮训练: train_acc: 0.6476151315789473, test_acc: 0.6496448874473572 正在进行第 2 轮训练: train_acc: 0.7886513157894737, test_acc: 0.7704545497894287 正在进行第 3 轮训练: train_acc: 0.6231496710526315, test_acc: 0.61796875 正在进行第 4 轮训练: train_acc: 0.8011924342105263, test_acc: 0.7922585248947144 正在进行第 5 轮训练: train_acc: 0.7518503289473685, test_acc: 0.7339488625526428 正在进行第 6 轮训练: train_acc: 0.819078947368421, test_acc: 0.803125 正在进行第 7 轮训练: train_acc: 0.75390625, test_acc: 0.7557528495788575 正在进行第 8 轮训练: train_acc: 0.7999588815789473, test_acc: 0.8031960248947143 。。。 。。。 。。。 train_acc: 0.743421052631579, test_acc: 0.7326704621315002 正在进行第 197 轮训练: train_acc: 0.8887746710526315, test_acc: 0.8418323874473572 正在进行第 198 轮训练: train_acc: 0.7909128289473685, test_acc: 0.7748579621315003 正在进行第 199 轮训练: train_acc: 0.8871299342105263, test_acc: 0.8360795497894287 正在进行第 200 轮训练: train_acc: 0.8838404605263158, test_acc: 0.8465909123420715 |
import torch from torch import nn from T import words_set,word2idx,idx2word,dict_len,seq_len,device from T import train class TextRNN1(nn.Module): """ 基于基础RNN结构来进行情感分析 """ def __init__(self, num_embeddings, embedding_dim, padding_idx,seq_len): super(TextRNN1, self).__init__() self.embed = nn.Embedding(num_embeddings=num_embeddings, embedding_dim=embedding_dim, padding_idx=padding_idx) self.rnn = nn.RNN(input_size=512, hidden_size=512) self.w = nn.Parameter(torch.ones(seq_len,1, dtype=torch.float32),requires_grad=True) self.fc = nn.Linear(in_features=512, out_features=2) def forward(self, x): x = self.embed(x) x1 = torch.permute(input=x, dims=(0, 2, 1)) x2 = torch.bmm(x,x1) x2 = torch.mean(x2,2) a = torch.softmax(x2,1) # print('a:',a.shape) x2 = x2*a h0 = torch.zeros(1, x.size(0), 512).float().to(device=device) x = torch.permute(input=x, dims=(1, 0, 2)) out, hn = self.rnn(x, h0) out = torch.permute(input=out, dims=(1, 0, 2)) #[B,seq_len,embedding_dim] out = torch.permute(input=out, dims=(0, 2, 1)) #[B,embedding_dim,seq_len] out = out@self.w # print(1,out.shape) out = torch.squeeze(input=out, dim=2) # print(2,out.shape) # hn = torch.sum(input=out, dim=0) out = out+x2 # print(1,out.shape) out = self.fc(out) return out device = "cuda:0" if torch.cuda.is_available() else "cpu" # device = "cuda:0" if torch.cuda.is_available() else "cpu" model = TextRNN1(num_embeddings=dict_len, embedding_dim=512, padding_idx=word2idx[" a = torch.randint(0,dict_len-1,(64,seq_len)).to(device=device) model(a)[:3] tensor([[ -6.5141, 2.8470], [-15.3093, 3.4901], [ -2.6264, 1.9446]], device='cuda:0', grad_fn=SliceBackward0) 融合注意力,高大上了很多,然...实际效果与线性参数学习效果一样... train(model) 正在进行第 1 轮训练: train_acc: 0.6227384868421053, test_acc: 0.6295454621315002 正在进行第 2 轮训练: train_acc: 0.6437088815789473, test_acc: 0.6204545497894287 正在进行第 3 轮训练: train_acc: 0.6494654605263158, test_acc: 0.6527698874473572 正在进行第 4 轮训练: train_acc: 0.6669407894736842, test_acc: 0.6718039870262146 正在进行第 5 轮训练: train_acc: 0.63671875, test_acc: 0.6379971623420715 正在进行第 6 轮训练: train_acc: 0.80078125, test_acc: 0.7698863744735718 正在进行第 7 轮训练: train_acc: 0.6348684210526315, test_acc: 0.6235795497894288 正在进行第 8 轮训练: train_acc: 0.7890625, test_acc: 0.7745028495788574 .... .... .... 正在进行第 193 轮训练: train_acc: 0.8005756578947368, test_acc: 0.7829545497894287 正在进行第 194 轮训练: train_acc: 0.8118832236842105, test_acc: 0.7953835248947143 正在进行第 195 轮训练: train_acc: 0.8807565789473685, test_acc: 0.8306107997894288 正在进行第 196 轮训练: train_acc: 0.88671875, test_acc: 0.8410511374473572 正在进行第 197 轮训练: train_acc: 0.8614309210526315, test_acc: 0.8410511374473572 正在进行第 198 轮训练: train_acc: 0.8754111842105263, test_acc: 0.8321022748947143 正在进行第 199 轮训练: train_acc: 0.8817845394736842, test_acc: 0.8282670497894287 正在进行第 200 轮训练: train_acc: 0.889391447368421, test_acc: 0.8495738744735718 |
from T import words_set,word2idx,idx2word,dict_len,seq_len,device from T import train class TextRNN1(nn.Module): """ 基于基础RNN结构来进行情感分析 """ def __init__(self, num_embeddings, embedding_dim, padding_idx,seq_len,rnn_num_layers=2): super(TextRNN1, self).__init__() self.embed = nn.Embedding(num_embeddings=num_embeddings, embedding_dim=embedding_dim, padding_idx=padding_idx) self.rnn_num_layers = rnn_num_layers self.rnn = nn.RNN(input_size=512, hidden_size=512,num_layers=rnn_num_layers,bidirectional=False) self.w = nn.Parameter(torch.ones(seq_len,1, dtype=torch.float32),requires_grad=True) self.fc = nn.Linear(in_features=512, out_features=2) def forward(self, x): x = self.embed(x) h0 = torch.zeros(self.rnn_num_layers, x.size(0), 512).float().to(device=device) x = torch.permute(input=x, dims=(1, 0, 2)) out, hn = self.rnn(x, h0) out = torch.permute(input=out, dims=(1, 0, 2)) #[B,seq_len,embedding_dim] out = torch.permute(input=out, dims=(0, 2, 1)) #[B,embedding_dim,seq_len] out = out@self.w # print(1,out.shape) out = torch.squeeze(input=out, dim=2) # print(2,out.shape) # hn = torch.sum(input=out, dim=0) out = self.fc(out) return out device = "cuda:0" if torch.cuda.is_available() else "cpu" # device = "cuda:0" if torch.cuda.is_available() else "cpu" model = TextRNN1(num_embeddings=dict_len, embedding_dim=512, padding_idx=word2idx[" 依然表现出不稳定性 train(model) 正在进行第 1 轮训练: train_acc: 0.502672697368421, test_acc: 0.49595171213150024 正在进行第 2 轮训练: train_acc: 0.5092516447368421, test_acc: 0.5020596623420716 正在进行第 3 轮训练: train_acc: 0.6147203947368421, test_acc: 0.6397727251052856 正在进行第 4 轮训练: train_acc: 0.6537828947368421, test_acc: 0.6372159123420715 正在进行第 5 轮训练: train_acc: 0.7039473684210527, test_acc: 0.7002840995788574 正在进行第 6 轮训练: train_acc: 0.7206003289473685, test_acc: 0.7237926125526428 正在进行第 7 轮训练: train_acc: 0.6905838815789473, test_acc: 0.7029119372367859 正在进行第 8 轮训练: train_acc: 0.6079358552631579, test_acc: 0.5798295497894287 正在进行第 9 轮训练: train_acc: 0.653577302631579, test_acc: 0.66171875 ... ... ... 正在进行第 287 轮训练: train_acc: 0.9292763157894737, test_acc: 0.8425426244735718 正在进行第 288 轮训练: train_acc: 0.9319490131578947, test_acc: 0.8455965995788575 正在进行第 289 轮训练: train_acc: 0.9229029605263158, test_acc: 0.8397727370262146 正在进行第 290 轮训练: train_acc: 0.9212582236842105, test_acc: 0.8308948874473572 正在进行第 291 轮训练: train_acc: 0.9222861842105263, test_acc: 0.8284801244735718 正在进行第 292 轮训练: train_acc: 0.9292763157894737, test_acc: 0.8455965995788575 正在进行第 293 轮训练: train_acc: 0.9222861842105263, test_acc: 0.8335227370262146 正在进行第 294 轮训练: train_acc: 0.9286595394736842, test_acc: 0.8350852370262146 正在进行第 295 轮训练: train_acc: 0.9356496710526315, test_acc: 0.8432528495788574 正在进行第 296 轮训练: train_acc: 0.9366776315789473, test_acc: 0.8401988744735718 正在进行第 297 轮训练: train_acc: 0.8947368421052632, test_acc: 0.819744324684143 正在进行第 298 轮训练: train_acc: 0.9189967105263158, test_acc: 0.8362926244735718 正在进行第 299 轮训练: train_acc: 0.9165296052631579, test_acc: 0.8337357997894287 正在进行第 300 轮训练: train_acc: 0.9187911184210527, test_acc: 0.83203125 |
class TextRNN1(nn.Module): """ 基于基础RNN结构来进行情感分析 """ def __init__(self, num_embeddings, embedding_dim, padding_idx,seq_len): super(TextRNN1, self).__init__() self.embed = nn.Embedding(num_embeddings=num_embeddings, embedding_dim=embedding_dim, padding_idx=padding_idx) self.rnn = nn.RNN(input_size=512, hidden_size=512,num_layers=2,bidirectional=False) self.w = nn.Parameter(torch.ones(seq_len,1, dtype=torch.float32),requires_grad=True) self.w1 = nn.Parameter(torch.ones(1, dtype=torch.float32),requires_grad=True) self.w2 = nn.Parameter(torch.ones(1, dtype=torch.float32),requires_grad=True) self.fc = nn.Linear(in_features=512, out_features=2) def forward(self, x): x = self.embed(x) x1 = torch.permute(input=x, dims=(0, 2, 1)) x2 = torch.bmm(x,x1) x2 = torch.mean(x2,2) a = torch.softmax(x2,1) # print('a:',a.shape) x2 = x2*a h0 = torch.zeros(2, x.size(0), 512).float().to(device=device) x = torch.permute(input=x, dims=(1, 0, 2)) out, hn = self.rnn(x, h0) out = torch.permute(input=out, dims=(1, 0, 2)) #[B,seq_len,embedding_dim] out = torch.permute(input=out, dims=(0, 2, 1)) #[B,embedding_dim,seq_len] out = out@self.w # print(1,out.shape) out = torch.squeeze(input=out, dim=2) # print(2,out.shape) # hn = torch.sum(input=out, dim=0) out = out*self.w1+x2*self.w2 # print(1,out.shape) out = self.fc(out) return out device = "cuda:0" if torch.cuda.is_available() else "cpu" # device = "cuda:0" if torch.cuda.is_available() else "cpu" model = TextRNN1(num_embeddings=dict_len, embedding_dim=512, padding_idx=word2idx[" out = out*self.w1+x2*self.w2 最后一步融合的时候加了学习参数,结果让收敛速度变得非常地慢 正在进行第 259 轮训练: train_acc: 0.8587582236842105, test_acc: 0.8132102370262146 正在进行第 260 轮训练: train_acc: 0.8497121710526315, test_acc: 0.8088778495788574 正在进行第 261 轮训练: train_acc: 0.8536184210526315, test_acc: 0.8107244372367859 正在进行第 262 轮训练: train_acc: 0.78515625, test_acc: 0.7372159123420715 正在进行第 263 轮训练: train_acc: 0.8540296052631579, test_acc: 0.8095880746841431 正在进行第 264 轮训练: train_acc: 0.841077302631579, test_acc: 0.8037642121315003 正在进行第 265 轮训练: train_acc: 0.6971628289473685, test_acc: 0.6632102251052856 |
import torch from torch import nn from torch.nn import functional as F from T import words_set,word2idx,idx2word,dict_len,seq_len,device from T import train class TextRNN4(nn.Module): """ 基于 GRU 结构来进行情感分析 """ def __init__(self, num_embeddings, embedding_dim, padding_idx,rnn_num_layers=2,seq_len=seq_len): super(TextRNN4, self).__init__() self.embed = nn.Embedding(num_embeddings=num_embeddings, embedding_dim=embedding_dim, padding_idx=padding_idx) self.rnn_num_layers = rnn_num_layers self.gru = nn.GRU(input_size=embedding_dim, hidden_size=512,num_layers=rnn_num_layers, bidirectional=False) self.w = nn.Parameter(torch.ones(seq_len,1, dtype=torch.float32),requires_grad=True) self.fc1 = nn.Linear(in_features=512, out_features=128) self.fc2 = nn.Linear(in_features=128, out_features=2) def forward(self, x): x = self.embed(x) h0 = torch.zeros(self.rnn_num_layers, x.size(0), 512).float().to(device=device) x = torch.permute(input=x, dims=(1, 0, 2)) out, hn = self.gru(x, h0) # hn = torch.squeeze(input=hn, dim=0) # out = torch.sum(input=out, dim=0) out = torch.permute(input=out, dims=(1, 0, 2)) #[B,seq_len,embedding_dim] out = torch.permute(input=out, dims=(0, 2, 1)) #[B,embedding_dim,seq_len] out = out@self.w out = torch.squeeze(input=out, dim=2) out = self.fc1(out) out = F.relu(out) out = self.fc2(out) return out device = "cuda:0" if torch.cuda.is_available() else "cpu" # device = "cuda:0" if torch.cuda.is_available() else "cpu" model = TextRNN4(num_embeddings=dict_len, embedding_dim=512, padding_idx=word2idx[" a = torch.randint(0,dict_len-1,(64,seq_len)).to(device=device) model(a)[:3] tensor([[-0.0076, -1.2789], [-1.9879, -1.3223], [ 0.3328, -3.4323]], device='cuda:0', grad_fn=SliceBackward0) train(model) 正在进行第 1 轮训练: train_acc: 0.5960115131578947, test_acc: 0.6265625 正在进行第 2 轮训练: train_acc: 0.5030838815789473, test_acc: 0.49595171213150024 正在进行第 3 轮训练: train_acc: 0.6938733552631579, test_acc: 0.6837357997894287 正在进行第 4 轮训练: train_acc: 0.6461759868421053, test_acc: 0.6132102251052857 正在进行第 5 轮训练: train_acc: 0.6975740131578947, test_acc: 0.6717329621315002 正在进行第 6 轮训练: train_acc: 0.7230674342105263, test_acc: 0.7051136374473572 正在进行第 7 轮训练: train_acc: 0.7364309210526315, test_acc: 0.7055397748947143 正在进行第 8 轮训练: train_acc: 0.7099095394736842, test_acc: 0.7137073874473572 正在进行第 9 轮训练: train_acc: 0.5779194078947368, test_acc: 0.5641335248947144 正在进行第 10 轮训练: train_acc: 0.7571957236842105, test_acc: 0.7381392121315002 正在进行第 11 轮训练: train_acc: 0.759046052631579, test_acc: 0.7424715995788574 正在进行第 12 轮训练: train_acc: 0.7259457236842105, test_acc: 0.6887784123420715 正在进行第 13 轮训练: train_acc: 0.7674753289473685, test_acc: 0.72890625 正在进行第 14 轮训练: train_acc: 0.6486430921052632, test_acc: 0.6557528495788574 正在进行第 15 轮训练: train_acc: 0.6946957236842105, test_acc: 0.6487926125526429 正在进行第 16 轮训练: train_acc: 0.752672697368421, test_acc: 0.7154119372367859 正在进行第 17 轮训练: train_acc: 0.7859786184210527, test_acc: 0.761150574684143 正在进行第 18 轮训练: train_acc: 0.5587993421052632, test_acc: 0.5475142121315002 正在进行第 19 轮训练: train_acc: 0.7826891447368421, test_acc: 0.7446732997894288 正在进行第 20 轮训练: train_acc: 0.5546875, test_acc: 0.5762784123420716 正在进行第 21 轮训练: train_acc: 0.7974917763157895, test_acc: 0.7553267121315003 正在进行第 22 轮训练: train_acc: 0.7549342105263158, test_acc: 0.7100142121315003 正在进行第 23 轮训练: train_acc: 0.6217105263157895, test_acc: 0.5901278495788574 正在进行第 24 轮训练: train_acc: 0.7255345394736842, test_acc: 0.7355823874473572 正在进行第 25 轮训练: train_acc: 0.7826891447368421, test_acc: 0.7743607997894287 正在进行第 26 轮训练: train_acc: 0.7039473684210527, test_acc: 0.704900574684143 正在进行第 27 轮训练: train_acc: 0.6934621710526315, test_acc: 0.6998579621315002 正在进行第 28 轮训练: train_acc: 0.7380756578947368, test_acc: 0.7480113625526428 正在进行第 29 轮训练: train_acc: 0.7534950657894737, test_acc: 0.7097301125526428 正在进行第 30 轮训练: train_acc: 0.7039473684210527, test_acc: 0.6578125 正在进行第 31 轮训练: train_acc: 0.77734375, test_acc: 0.7390625 正在进行第 32 轮训练: train_acc: 0.7711759868421053, test_acc: 0.7329545497894288 正在进行第 33 轮训练: train_acc: 0.6461759868421053, test_acc: 0.6142755746841431 正在进行第 34 轮训练: train_acc: 0.8032483552631579, test_acc: 0.78671875 正在进行第 35 轮训练: train_acc: 0.7884457236842105, test_acc: 0.7833096623420716 正在进行第 36 轮训练: train_acc: 0.7569901315789473, test_acc: 0.7615056872367859 正在进行第 37 轮训练: train_acc: 0.7419819078947368, test_acc: 0.6953835248947143 正在进行第 38 轮训练: train_acc: 0.7925575657894737, test_acc: 0.7453125 正在进行第 39 轮训练: train_acc: 0.8100328947368421, test_acc: 0.7690340995788574 正在进行第 40 轮训练: train_acc: 0.7060032894736842, test_acc: 0.6689630746841431 正在进行第 41 轮训练: train_acc: 0.7911184210526315, test_acc: 0.7840198874473572 正在进行第 42 轮训练: train_acc: 0.7991365131578947, test_acc: 0.79375 正在进行第 43 轮训练: train_acc: 0.8256578947368421, test_acc: 0.8014204621315002 正在进行第 44 轮训练: train_acc: 0.7129934210526315, test_acc: 0.7213068246841431 正在进行第 45 轮训练: train_acc: 0.5993009868421053, test_acc: 0.5727272748947143 正在进行第 46 轮训练: train_acc: 0.8390213815789473, test_acc: 0.8026278495788575 正在进行第 47 轮训练: train_acc: 0.7775493421052632, test_acc: 0.7833096623420716 正在进行第 48 轮训练: train_acc: 0.6036184210526315, test_acc: 0.6108664751052857 正在进行第 49 轮训练: train_acc: 0.7481496710526315, test_acc: 0.7030539870262146 正在进行第 50 轮训练: train_acc: 0.8009868421052632, test_acc: 0.7875710248947143 正在进行第 51 轮训练: train_acc: 0.7185444078947368, test_acc: 0.728338074684143 正在进行第 52 轮训练: train_acc: 0.7193667763157895, test_acc: 0.6794034123420716 正在进行第 53 轮训练: train_acc: 0.6944901315789473, test_acc: 0.6545454621315002 正在进行第 54 轮训练: train_acc: 0.8402549342105263, test_acc: 0.7958806872367858 正在进行第 55 轮训练: train_acc: 0.8159950657894737, test_acc: 0.8026988744735718 正在进行第 56 轮训练: train_acc: 0.8342927631578947, test_acc: 0.8084517121315002 正在进行第 57 轮训练: train_acc: 0.8439555921052632, test_acc: 0.8029119372367859 正在进行第 58 轮训练: train_acc: 0.8114720394736842, test_acc: 0.7667613744735717 正在进行第 59 轮训练: train_acc: 0.8441611842105263, test_acc: 0.8099431872367859 正在进行第 60 轮训练: train_acc: 0.7921463815789473, test_acc: 0.7349431872367859 正在进行第 61 轮训练: train_acc: 0.8266858552631579, test_acc: 0.8092329621315002 正在进行第 62 轮训练: train_acc: 0.7335526315789473, test_acc: 0.74140625 正在进行第 63 轮训练: train_acc: 0.8375822368421053, test_acc: 0.7951704621315002 正在进行第 64 轮训练: train_acc: 0.8451891447368421, test_acc: 0.8024857997894287 正在进行第 65 轮训练: train_acc: 0.8036595394736842, test_acc: 0.8034090995788574 正在进行第 66 轮训练: train_acc: 0.7979029605263158, test_acc: 0.7561789870262146 正在进行第 67 轮训练: train_acc: 0.8223684210526315, test_acc: 0.7737926244735718 正在进行第 68 轮训练: train_acc: 0.852796052631579, test_acc: 0.8145596623420716 正在进行第 69 轮训练: train_acc: 0.8205180921052632, test_acc: 0.7683238744735718 正在进行第 70 轮训练: train_acc: 0.8227796052631579, test_acc: 0.8089488744735718 正在进行第 71 轮训练: train_acc: 0.7944078947368421, test_acc: 0.7480823874473572 正在进行第 72 轮训练: train_acc: 0.7909128289473685, test_acc: 0.7422585248947143 正在进行第 73 轮训练: train_acc: 0.6566611842105263, test_acc: 0.6286931872367859 正在进行第 74 轮训练: train_acc: 0.7481496710526315, test_acc: 0.7042613625526428 正在进行第 75 轮训练: train_acc: 0.7701480263157895, test_acc: 0.7671164870262146 正在进行第 76 轮训练: train_acc: 0.7571957236842105, test_acc: 0.7639204621315002 正在进行第 77 轮训练: train_acc: 0.86328125, test_acc: 0.8176846623420715 正在进行第 78 轮训练: train_acc: 0.8077713815789473, test_acc: 0.7589488744735717 。。。。 。。。。 。。。。 train_acc: 0.9097450657894737, test_acc: 0.8473011374473571 正在进行第 286 轮训练: train_acc: 0.7378700657894737, test_acc: 0.7109375 正在进行第 287 轮训练: train_acc: 0.8969983552631579, test_acc: 0.8231534123420715 正在进行第 288 轮训练: train_acc: 0.9286595394736842, test_acc: 0.8594460248947143 正在进行第 289 轮训练: train_acc: 0.9344161184210527, test_acc: 0.8642045497894287 正在进行第 290 轮训练: train_acc: 0.9189967105263158, test_acc: 0.854900574684143 正在进行第 291 轮训练: train_acc: 0.6891447368421053, test_acc: 0.64375 正在进行第 292 轮训练: train_acc: 0.9263980263157895, test_acc: 0.8576704621315002 正在进行第 293 轮训练: train_acc: 0.8460115131578947, test_acc: 0.8085227370262146 正在进行第 294 轮训练: train_acc: 0.9385279605263158, test_acc: 0.8755681872367859 正在进行第 295 轮训练: train_acc: 0.9409950657894737, test_acc: 0.8747869372367859 正在进行第 296 轮训练: train_acc: 0.9379111842105263, test_acc: 0.8736505746841431 正在进行第 297 轮训练: train_acc: 0.9432565789473685, test_acc: 0.8747869372367859 正在进行第 298 轮训练: train_acc: 0.9389391447368421, test_acc: 0.8700994372367858 正在进行第 299 轮训练: train_acc: 0.9424342105263158, test_acc: 0.8735795497894288 正在进行第 300 轮训练: train_acc: 0.94140625, test_acc: 0.8774857997894288 |
import torch from torch import nn from torch.nn import functional as F from T import words_set,word2idx,idx2word,dict_len,seq_len,device from T import train class TextRNN4(nn.Module): """ 基于 GRU 结构来进行情感分析 """ def __init__(self, num_embeddings, embedding_dim, padding_idx,rnn_num_layers=2,seq_len=seq_len): super(TextRNN4, self).__init__() self.embed = nn.Embedding(num_embeddings=num_embeddings, embedding_dim=embedding_dim, padding_idx=padding_idx) self.rnn_num_layers = rnn_num_layers self.gru = nn.GRU(input_size=embedding_dim, hidden_size=512,num_layers=rnn_num_layers, bidirectional=False) self.w = nn.Parameter(torch.ones(seq_len,1, dtype=torch.float32),requires_grad=True) self.fc1 = nn.Linear(in_features=512, out_features=128) self.fc2 = nn.Linear(in_features=128, out_features=2) def forward(self, x): x = self.embed(x) x1 = torch.permute(input=x, dims=(0, 2, 1)) x2 = torch.bmm(x,x1) x2 = torch.mean(x2,2) a = torch.softmax(x2,1) # print('a:',a.shape) x2 = x2*a h0 = torch.zeros(self.rnn_num_layers, x.size(0), 512).float().to(device=device) x = torch.permute(input=x, dims=(1, 0, 2)) out, hn = self.gru(x, h0) # hn = torch.squeeze(input=hn, dim=0) # out = torch.sum(input=out, dim=0) out = torch.permute(input=out, dims=(1, 0, 2)) #[B,seq_len,embedding_dim] out = torch.permute(input=out, dims=(0, 2, 1)) #[B,embedding_dim,seq_len] out = out@self.w out = torch.squeeze(input=out, dim=2) out = out + x2 out = self.fc1(out) out = F.relu(out) out = self.fc2(out) return out device = "cuda:0" if torch.cuda.is_available() else "cpu" # device = "cuda:0" if torch.cuda.is_available() else "cpu" model = TextRNN4(num_embeddings=dict_len, embedding_dim=512, padding_idx=word2idx["<PAD>"],seq_len=seq_len) model.to(device=device) # 定义优化器 optimizer = torch.optim.SGD(params=model.parameters(), lr=1e-3) # 定义损失函数 loss_fn = nn.CrossEntropyLoss() a = torch.randint(0,dict_len-1,(64,seq_len)).to(device=device) model(a)[:3] tensor([[ 3.0386, 3.3657], [ 7.9816, 0.3211], [ 4.6253, -1.9200]], device='cuda:0', grad_fn=SliceBackward0) train(model) 正在进行第 1 轮训练: train_acc: 0.6212993421052632, test_acc: 0.6240767121315003 正在进行第 2 轮训练: train_acc: 0.63671875, test_acc: 0.6154119372367859 正在进行第 3 轮训练: train_acc: 0.5916940789473685, test_acc: 0.5946022748947144 正在进行第 4 轮训练: train_acc: 0.5333059210526315, test_acc: 0.5214488625526428 正在进行第 5 轮训练: train_acc: 0.6903782894736842, test_acc: 0.6786931872367858 ... ... ... 正在进行第 294 轮训练: train_acc: 0.9333881578947368, test_acc: 0.8639914870262146 正在进行第 295 轮训练: train_acc: 0.9399671052631579, test_acc: 0.8640625 正在进行第 296 轮训练: train_acc: 0.9422286184210527, test_acc: 0.8389914870262146 正在进行第 297 轮训练: train_acc: 0.9346217105263158, test_acc: 0.8578835248947143 正在进行第 298 轮训练: train_acc: 0.8784950657894737, test_acc: 0.8085227370262146 正在进行第 299 轮训练: train_acc: 0.9432565789473685, test_acc: 0.8585227370262146 正在进行第 300 轮训练: train_acc: 0.9346217105263158, test_acc: 0.8484375 |
|
|
|