七三笔记

LSTM理解

 
Understanding LSTM Networks
http://colah.github.io/posts/2015-08-Understanding-LSTMs/

 
RNN的思想很好，
- 一件事有过去，现在，未来 三个阶段，RNN卡的是当下 

但问题是当下发生的事情太多了，它并不能很好的 从全局（过去，现在，未来）的视角去确定一件事
- 哪个当下对于整体是重要的  
- 整体的关键节点是哪些

 
后来的注意力解决这个问题
- 重要信息

能否将RNN的结果与注意整合使用呢？这样效果会不会更好？

 
class TextRNN1(nn.Module):
    """
        基于基础RNN结构来进行情感分析
    """
    def __init__(self, num_embeddings, embedding_dim, padding_idx):
        super(TextRNN1, self).__init__()
        
        self.embed = nn.Embedding(num_embeddings=num_embeddings,
                                 embedding_dim=embedding_dim,
                                padding_idx=padding_idx)
        
        self.rnn = nn.RNN(input_size=256, hidden_size=512, bidirectional=True)
        
        self.fc = nn.Linear(in_features=512, out_features=2)
    
    def forward(self, x):
        # [b, 86] -- [b, 86, 256]
        x = self.embed(x)
        h0 = torch.zeros(2, x.size(0), 512).float().to(device=device)
        # [b, 86, 256] -- [86, b, 256]
        x = torch.permute(input=x, dims=(1, 0, 2))
        out, hn = self.rnn(x, h0)
#         hn = torch.squeeze(input=hn, dim=0)
        hn = torch.sum(input=hn, dim=0)
        out = self.fc(hn)
        return out

 

class TextRNN2(nn.Module):
    """
    """
    def __init__(self, num_embeddings, embedding_dim, padding_idx):
        super(TextRNN2, self).__init__()
        
        self.embed = nn.Embedding(num_embeddings=num_embeddings,
                                 embedding_dim=embedding_dim,
                                padding_idx=padding_idx)
        
        self.rnn = nn.RNN(input_size=256, hidden_size=512)
        
        self.fc = nn.Linear(in_features=512, out_features=2)
    
    def forward(self, x):
        # [b, 86] -- [b, 86, 256]
        x = self.embed(x)
        h0 = torch.zeros(1, x.size(0), 512).float()
        # [b, 86, 256] -- [86, b, 256]
        x = torch.permute(input=x, dims=(1, 0, 2))
        # out: [86, b, 512]
        out, hn = self.rnn(x, h0)
#         hn = torch.squeeze(input=hn, dim=0)
        out = torch.sum(input=out, dim=0)
        out = self.fc(out)
        return out

 
class TextRNN3(nn.Module):
    """
    """
    def __init__(self, num_embeddings, embedding_dim, padding_idx):
        super(TextRNN3, self).__init__()
        
        self.embed = nn.Embedding(num_embeddings=num_embeddings,
                                 embedding_dim=embedding_dim,
                                padding_idx=padding_idx)
        
        self.lstm = nn.LSTM(input_size=256, hidden_size=512, 
                            num_layers=2,
                            bidirectional=True)
        
        self.fc = nn.Linear(in_features=512, out_features=2)
    
    def forward(self, x):
        
        # [b, 86] -- [b, 86, 256]
        x = self.embed(x)
        # [b, 86, 256] -- [86, b, 256]
        x = torch.permute(input=x, dims=(1, 0, 2))
        
        # [1, b, 512]
        h0 = torch.zeros(4, x.size(1), 512).float().to(device=device)
        c0 = torch.zeros(4, x.size(1), 512).float().to(device=device)
       
        out, (hn, cn) = self.lstm(x, (h0, c0))
        
#         out = torch.sum(input=out, dim=0)
        
#         cn = torch.squeeze(input=cn, dim=0)
        cn = torch.sum(input=cn, dim=0)

#         hn = hn[-1]
        
        out = self.fc(cn)
        
        return out

 
class TextRNN4(nn.Module):
    """
        基于 GRU 结构来进行情感分析
    """
    def __init__(self, num_embeddings, embedding_dim, padding_idx):
        super(TextRNN4, self).__init__()
        
        self.embed = nn.Embedding(num_embeddings=num_embeddings,
                                 embedding_dim=embedding_dim,
                                padding_idx=padding_idx)
        
        self.gru = nn.GRU(input_size=256, hidden_size=512, bidirectional=True)
        
        self.fc1 = nn.Linear(in_features=512, out_features=128)
        
        self.fc2 = nn.Linear(in_features=128, out_features=2)
    
    def forward(self, x):
        x = self.embed(x)
        h0 = torch.zeros(2, x.size(0), 512).float().to(device=device)
        x = torch.permute(input=x, dims=(1, 0, 2))
        out, hn = self.gru(x, h0)
#         hn = torch.squeeze(input=hn, dim=0)
        hn = torch.sum(input=hn, dim=0)
        out = self.fc1(hn)
        out = F.relu(out)
        out = self.fc2(out)
        return out

RNN训练

 
import os 
from tpf import pkl_save,pkl_load 
BASE_DIR = "/root/datasets/hotel_reader"
file_path = os.path.join(BASE_DIR,'data_pkl/word.pkl')
X_train,y_train,X_test,y_test,words_set,word2idx,idx2word = pkl_load(file_path)

# 字典长度
dict_len = len(words_set)

# 序列长度
seq_len = 512

# 训练集
X_train1 = []
for x in X_train:
    temp = x + ["<PAD>"] * seq_len
    X_train1.append(temp[:seq_len])

# 测试集
X_test1 = []
for x in X_test:
    temp = x + ["<PAD>"] * seq_len
    X_test1.append(temp[:seq_len])


"""
    索引向量化
"""
# 训练集向量化

X_train2 = []
for x in X_train1:
    temp = []
    for word in x:
        idx = word2idx[word] if word in word2idx else word2idx["<UNK>"]
        temp.append(idx)
    X_train2.append(temp)
    

# 测试集向量化

X_test2 = []
for x in X_test1:
    temp = []
    for word in x:
        idx = word2idx[word] if word in word2idx else word2idx["<UNK>"]
        temp.append(idx)
    X_test2.append(temp)


"""
    构建数据集
"""
import numpy as np
from torch.utils.data import Dataset
from torch.utils.data import DataLoader
import torch
from torch import nn
from torch.nn import functional as F

class MyDataSet(Dataset):
    
    def __init__(self, X=X_train2, y=y_train):
        self.X = X
        self.y = y
    
    def __len__(self):
        return len(self.X)
    
    def __getitem__(self, idx):
        x = self.X[idx]
        y = self.y[idx]
        
        return torch.tensor(data=x).long(), torch.tensor(data=y).long()

train_dataset = MyDataSet(X=X_train2, y=y_train)
train_dataloader = DataLoader(dataset=train_dataset, shuffle=True, batch_size=128)

test_dataset = MyDataSet(X=X_test2, y=y_test)
test_dataloader = DataLoader(dataset=test_dataset, shuffle=False, batch_size=256)
train_dataset[0][0][:7]

 
tensor([ 5321,   706, 20311, 14111,  1036,   823, 14111])

 
class TextRNN1(nn.Module):
    """
        基于基础RNN结构来进行情感分析
    """
    def __init__(self, num_embeddings, embedding_dim, padding_idx):
        super(TextRNN1, self).__init__()
        
        self.embed = nn.Embedding(num_embeddings=num_embeddings,
                                 embedding_dim=embedding_dim,
                                padding_idx=padding_idx)
        
        self.rnn = nn.RNN(input_size=256, hidden_size=512, bidirectional=True)
        
        self.fc = nn.Linear(in_features=512, out_features=2)
    
    def forward(self, x):
        # [b, 86] -- [b, 86, 256]
        x = self.embed(x)
        h0 = torch.zeros(2, x.size(0), 512).float().to(device=device)
        # [b, 86, 256] -- [86, b, 256]
        x = torch.permute(input=x, dims=(1, 0, 2))
        out, hn = self.rnn(x, h0)
#         hn = torch.squeeze(input=hn, dim=0)
        hn = torch.sum(input=hn, dim=0)
        out = self.fc(hn)
        return out
    
model = TextRNN1(num_embeddings=dict_len, embedding_dim=256, padding_idx=word2idx[""])
model.to(device=device)

# 定义优化器
optimizer = torch.optim.SGD(params=model.parameters(), lr=1e-3)

# 定义损失函数
loss_fn = nn.CrossEntropyLoss()
    
a = torch.randint(0,dict_len-1,(64,seq_len)).to(device=device)
# model(a)

 
# 定义训练轮次
epochs = 200
device = "cuda:0" if torch.cuda.is_available() else "cpu"

# 定义过程监控函数
def get_acc(dataloader=train_dataloader, model=model):
    accs = []
    model.to(device=device)
    model.eval()
    with torch.no_grad():
        for X,y in dataloader:
            X=X.to(device=device)
            y=y.to(device=device)
            y_pred = model(X)
            y_pred = y_pred.argmax(dim=1)
            acc = (y_pred == y).float().mean().item()
            accs.append(acc)
    return np.array(accs).mean()

# 定义训练过程
def train(model=model, 
            optimizer=optimizer, 
            loss_fn=loss_fn, 
            epochs=epochs, 
            train_dataloader=train_dataloader,
            test_dataloader=test_dataloader):
    model.to(device=device)
    for epoch in range(1, epochs+1):
        print(f"正在进行第 {epoch} 轮训练:")

        model.train()
        for X,y in train_dataloader:
            X=X.to(device=device)
            y=y.to(device=device)
            # 正向传播
            y_pred = model(X)

            # 清空梯度
            optimizer.zero_grad()

            # 计算损失
            loss = loss_fn(y_pred, y)

            # 梯度下降
            loss.backward()

            # 优化一步
            optimizer.step()

        print(f"train_acc: {get_acc(dataloader=train_dataloader)}, test_acc: {get_acc(dataloader=test_dataloader)}")
        
train()

到200轮才76%的精度，并且增长也非常慢了，模型表达能力有限

 
正在进行第 194 轮训练:
train_acc: 0.8244243421052632, test_acc: 0.7652698874473571
正在进行第 195 轮训练:
train_acc: 0.8225740131578947, test_acc: 0.7633522748947144
正在进行第 196 轮训练:
train_acc: 0.8270970394736842, test_acc: 0.7621448874473572
正在进行第 197 轮训练:
train_acc: 0.8283305921052632, test_acc: 0.7641335248947143
正在进行第 198 轮训练:
train_acc: 0.8279194078947368, test_acc: 0.7641335248947143
正在进行第 199 轮训练:
train_acc: 0.8279194078947368, test_acc: 0.7644886374473572
正在进行第 200 轮训练:
train_acc: 0.8256578947368421, test_acc: 0.7644886374473572

 
from datasets import train_dataloader
from datasets import test_dataloader
from datasets import words_set,word2idx,idx2word
from T import train

 
import torch
from torch import nn 

class TextRNN1(nn.Module):
    """
        基于基础RNN结构来进行情感分析
    """
    def __init__(self, num_embeddings, embedding_dim, padding_idx):
        super(TextRNN1, self).__init__()
        
        self.embed = nn.Embedding(num_embeddings=num_embeddings,
                                    embedding_dim=embedding_dim,
                                padding_idx=padding_idx)
        
        self.rnn = nn.RNN(input_size=256, hidden_size=512, bidirectional=True)
        
        self.fc = nn.Linear(in_features=512, out_features=2)
    
    def forward(self, x):
        # [b, 86] -- [b, 86, 256]
        x = self.embed(x)
        h0 = torch.zeros(2, x.size(0), 512).float().to(device=device)
        # [b, 86, 256] -- [86, b, 256]
        x = torch.permute(input=x, dims=(1, 0, 2))
        out, hn = self.rnn(x, h0)
#         hn = torch.squeeze(input=hn, dim=0)
        hn = torch.sum(input=hn, dim=0)
        out = self.fc(hn)
        return out
    
device = "cuda:0" if torch.cuda.is_available() else "cpu"

 
# 字典长度
dict_len = len(words_set)

model = TextRNN1(num_embeddings=dict_len, embedding_dim=256, padding_idx=word2idx["<PAD>"])
model.to(device=device)

# 定义优化器
optimizer = torch.optim.SGD(params=model.parameters(), lr=1e-3)

# 定义损失函数
loss_fn = nn.CrossEntropyLoss()

 

train(model)

 
from T import words_set,word2idx,idx2word,dict_len,seq_len
from T import train

 
import torch
from torch import nn

device = "cuda:0" if torch.cuda.is_available() else "cpu"

class SingleRNNDefine(nn.Module):
    def __init__(self,input_size,hidden_size):
        super().__init__()
        # [batch_size,embedding_dim]@[embedding_dim,hidden_size] = [batch_size,hidden_size]
        self.cell_linear_x = nn.Linear(in_features=input_size,  out_features=hidden_size)
        self.cell_linear_h = nn.Linear(in_features=hidden_size, out_features=hidden_size)

    def forward(self,x,h0):
        seq_len, batch_size, embedding = x.shape
        output = []
        ht = h0[0] # [1,batch_size,embedding]
        for t in range(seq_len):
            # print(f"x[{t}].shape={x[t].shape}")  # x[86].shape=torch.Size([32, 256])
            # [batch_size,embedding] -- [batch_size,hidden_size]
            # 对于每一个时间步来说，不需要管seq_len的维度,因为一步一个单词
            each_word = self.cell_linear_x(x[t])
            # print(f"t={t},each_word.shape={each_word.shape}")
            # print(f"t={t},ht.shape={ht.shape}")

            ht = self.cell_linear_h(ht)

            ht = torch.tanh(each_word + ht)
            # print(ht.shape)  # torch.Size([32, 512])

            output.append(ht.tolist())

        hn = torch.unsqueeze(input=ht,dim=0)
        # print(hn.shape)  # torch.Size([1, 32, 512])
        output = torch.Tensor(output)
        return output,hn

 
class TextRNN1(nn.Module):
    """
        基于基础RNN结构来进行情感分析
    """
    def __init__(self, num_embeddings, embedding_dim, padding_idx):
        super(TextRNN1, self).__init__()
        
        self.embed = nn.Embedding(num_embeddings=num_embeddings,
                                 embedding_dim=embedding_dim,
                                padding_idx=padding_idx)
        
        self.rnn = SingleRNNDefine(input_size=256, hidden_size=512)
        
        self.fc = nn.Linear(in_features=512, out_features=2)
    
    def forward(self, x):
        # [b, 86] -- [b, 86, 256]
        x = self.embed(x)
        h0 = torch.zeros(2, x.size(0), 512).float().to(device=device)
        # [b, 86, 256] -- [86, b, 256]
        x = torch.permute(input=x, dims=(1, 0, 2))
        out, hn = self.rnn(x, h0)
#         hn = torch.squeeze(input=hn, dim=0)
        hn = torch.sum(input=hn, dim=0)
        out = self.fc(hn)
        return out
    
model = TextRNN1(num_embeddings=dict_len, embedding_dim=256, padding_idx=word2idx[""])
model.to(device=device)

# 定义优化器
optimizer = torch.optim.SGD(params=model.parameters(), lr=1e-3)

# 定义损失函数
loss_fn = nn.CrossEntropyLoss()

代码可以运行

 
a = torch.randint(0,dict_len-1,(64,seq_len)).to(device=device)
# model(a)

但训练时不仅慢，而且输出结果不会变

 
train(model)

正在进行第 1 轮训练:
train_acc: 0.4967105263157895, test_acc: 0.4947443246841431
正在进行第 2 轮训练:
train_acc: 0.4995888157894737, test_acc: 0.4947443246841431
正在进行第 3 轮训练:
train_acc: 0.49773848684210525, test_acc: 0.4947443246841431
正在进行第 4 轮训练:
train_acc: 0.4985608552631579, test_acc: 0.4947443246841431
正在进行第 5 轮训练:
train_acc: 0.4981496710526316, test_acc: 0.4947443246841431
正在进行第 6 轮训练:
train_acc: 0.4985608552631579, test_acc: 0.4947443246841431
正在进行第 7 轮训练:
train_acc: 0.4981496710526316, test_acc: 0.4947443246841431
正在进行第 8 轮训练:
train_acc: 0.4979440789473684, test_acc: 0.4947443246841431
正在进行第 9 轮训练:
train_acc: 0.4975328947368421, test_acc: 0.4947443246841431
正在进行第 10 轮训练:
train_acc: 0.4967105263157895, test_acc: 0.4947443246841431
正在进行第 11 轮训练:
train_acc: 0.49897203947368424, test_acc: 0.4947443246841431
正在进行第 12 轮训练:
train_acc: 0.49712171052631576, test_acc: 0.4947443246841431
正在进行第 13 轮训练:
train_acc: 0.49835526315789475, test_acc: 0.4947443246841431
正在进行第 14 轮训练:
train_acc: 0.4995888157894737, test_acc: 0.4947443246841431
正在进行第 15 轮训练:
train_acc: 0.4987664473684211, test_acc: 0.4947443246841431

 
上次在1维卷积中使用BN就遇到了这样的情况 ...

 
import os 
from tpf import pkl_save,pkl_load 
BASE_DIR = "/root/datasets/hotel_reader"
file_path = os.path.join(BASE_DIR,'data_pkl/word.pkl')
X_train,y_train,X_test,y_test,words_set,word2idx,idx2word = pkl_load(file_path)

# 字典长度
dict_len = len(words_set)

# 序列长度
seq_len = 512

# 训练集
X_train1 = []
for x in X_train:
    temp = x + ["<PAD>"] * seq_len
    X_train1.append(temp[:seq_len])

# 测试集
X_test1 = []
for x in X_test:
    temp = x + ["<PAD>"] * seq_len
    X_test1.append(temp[:seq_len])


"""
    索引向量化
"""
# 训练集向量化

X_train2 = []
for x in X_train1:
    temp = []
    for word in x:
        idx = word2idx[word] if word in word2idx else word2idx["<UNK>"]
        temp.append(idx)
    X_train2.append(temp)
    

# 测试集向量化

X_test2 = []
for x in X_test1:
    temp = []
    for word in x:
        idx = word2idx[word] if word in word2idx else word2idx["<UNK>"]
        temp.append(idx)
    X_test2.append(temp)


"""
    构建数据集
"""
import numpy as np
from torch.utils.data import Dataset
from torch.utils.data import DataLoader
import torch
from torch import nn
from torch.nn import functional as F

class MyDataSet(Dataset):
    
    def __init__(self, X=X_train2, y=y_train):
        self.X = X
        self.y = y
    
    def __len__(self):
        return len(self.X)
    
    def __getitem__(self, idx):
        x = self.X[idx]
        y = self.y[idx]
        
        return torch.tensor(data=x).long(), torch.tensor(data=y).long()

train_dataset = MyDataSet(X=X_train2, y=y_train)
train_dataloader = DataLoader(dataset=train_dataset, shuffle=True, batch_size=128)

test_dataset = MyDataSet(X=X_test2, y=y_test)
test_dataloader = DataLoader(dataset=test_dataset, shuffle=False, batch_size=256)


# 定义训练轮次
epochs = 200
device = "cuda:0" if torch.cuda.is_available() else "cpu"


# 定义过程监控函数
def get_acc(dataloader=train_dataloader, model=None):
    accs = []
    model.to(device=device)
    model.eval()
    with torch.no_grad():
        for X,y in dataloader:
            X=X.to(device=device)
            y=y.to(device=device)
            y_pred = model(X)
            y_pred = y_pred.argmax(dim=1)
            acc = (y_pred == y).float().mean().item()
            accs.append(acc)
    return np.array(accs).mean()

# 定义训练过程
def train(model, 
            epochs=epochs, 
            train_dataloader=train_dataloader,
            test_dataloader=test_dataloader):
    model.to(device=device)
    
    # 定义优化器
    optimizer = torch.optim.SGD(params=model.parameters(), lr=1e-3)

    # 定义损失函数
    loss_fn = nn.CrossEntropyLoss()


    for epoch in range(1, epochs+1):
        print(f"正在进行第 {epoch} 轮训练:")

        model.train()
        for X,y in train_dataloader:
            X=X.to(device=device)
            y=y.to(device=device)
            # 正向传播
            y_pred = model(X)

            # 清空梯度
            optimizer.zero_grad()

            # 计算损失
            loss = loss_fn(y_pred, y)

            # 梯度下降
            loss.backward()

            # 优化一步
            optimizer.step()

        print(f"train_acc: {get_acc(dataloader=train_dataloader,model=model)}, test_acc: {get_acc(dataloader=test_dataloader,model=model)}")

RNN优化版

 
RNN的思想：数据 在时间上的 展开/变化

但问题是没有重点

优化方案1：设置参数学习谁重要

 
hn = torch.sum(input=out, dim=0)

原来的做法是，直接求和/平均值 

现在改为可学习参数 

该方案有效但训练波动大

优化方法2：相同的方法，增加并行

 
单条链上RNN表达能力不足，再来一条相同的

效果上没有什么增益 

舍弃

优化3：取一个自注意力，与RNN输出信息融合

 
有效 ，并且收敛速度也快了一些

 
from T import words_set,word2idx,idx2word,dict_len,seq_len,device
from T import train

 

class TextRNN1(nn.Module):
    """
        基于基础RNN结构来进行情感分析
    """
    def __init__(self, num_embeddings, embedding_dim, padding_idx,seq_len):
        super(TextRNN1, self).__init__()
        
        self.embed = nn.Embedding(num_embeddings=num_embeddings,
                                 embedding_dim=embedding_dim,
                                padding_idx=padding_idx)
        
        self.rnn = nn.RNN(input_size=512, hidden_size=512)
        
        self.w = nn.Parameter(torch.ones(seq_len,1, dtype=torch.float32),requires_grad=True)
        
        self.fc = nn.Linear(in_features=512, out_features=2)
    
    def forward(self, x):
        x = self.embed(x)
        h0 = torch.zeros(1, x.size(0), 512).float().to(device=device)
        x = torch.permute(input=x, dims=(1, 0, 2))
        out, hn = self.rnn(x, h0)
        out = torch.permute(input=out, dims=(1, 0, 2)) #[B,seq_len,embedding_dim]
        out = torch.permute(input=out, dims=(0, 2, 1)) #[B,embedding_dim,seq_len]
 
        out = out@self.w
        # print(1,out.shape)
        out = torch.squeeze(input=out, dim=2)
        # print(2,out.shape)
        # hn = torch.sum(input=out, dim=0)
        out = self.fc(out)
        return out
    
device = "cuda:0" if torch.cuda.is_available() else "cpu"

# device = "cuda:0" if torch.cuda.is_available() else "cpu"  
model = TextRNN1(num_embeddings=dict_len, embedding_dim=512, padding_idx=word2idx[""],seq_len=seq_len)
model.to(device=device)

# 定义优化器
optimizer = torch.optim.SGD(params=model.parameters(), lr=1e-3)

# 定义损失函数
loss_fn = nn.CrossEntropyLoss()

 
a = torch.randint(0,dict_len-1,(64,seq_len)).to(device=device)
# model(a)

 
a = torch.randn(64,3,7)
w = torch.randn(7,1)
(a@w).shape  #torch.Size([64, 3, 1])

可能是由于网络过于简单，导致训练过程波动较大

 
正在进行第 1 轮训练:
train_acc: 0.6476151315789473, test_acc: 0.6496448874473572
正在进行第 2 轮训练:
train_acc: 0.7886513157894737, test_acc: 0.7704545497894287
正在进行第 3 轮训练:
train_acc: 0.6231496710526315, test_acc: 0.61796875
正在进行第 4 轮训练:
train_acc: 0.8011924342105263, test_acc: 0.7922585248947144
正在进行第 5 轮训练:
train_acc: 0.7518503289473685, test_acc: 0.7339488625526428
正在进行第 6 轮训练:
train_acc: 0.819078947368421, test_acc: 0.803125
正在进行第 7 轮训练:
train_acc: 0.75390625, test_acc: 0.7557528495788575
正在进行第 8 轮训练:
train_acc: 0.7999588815789473, test_acc: 0.8031960248947143

。。。
。。。
。。。 

train_acc: 0.743421052631579, test_acc: 0.7326704621315002
正在进行第 197 轮训练:
train_acc: 0.8887746710526315, test_acc: 0.8418323874473572
正在进行第 198 轮训练:
train_acc: 0.7909128289473685, test_acc: 0.7748579621315003
正在进行第 199 轮训练:
train_acc: 0.8871299342105263, test_acc: 0.8360795497894287
正在进行第 200 轮训练:
train_acc: 0.8838404605263158, test_acc: 0.8465909123420715

 
import torch
from torch import nn 
from T import words_set,word2idx,idx2word,dict_len,seq_len,device
from T import train

 

class TextRNN1(nn.Module):
    """
        基于基础RNN结构来进行情感分析
    """
    def __init__(self, num_embeddings, embedding_dim, padding_idx,seq_len):
        super(TextRNN1, self).__init__()
        
        self.embed = nn.Embedding(num_embeddings=num_embeddings,
                                 embedding_dim=embedding_dim,
                                padding_idx=padding_idx)
        
        self.rnn = nn.RNN(input_size=512, hidden_size=512)
        
        self.w = nn.Parameter(torch.ones(seq_len,1, dtype=torch.float32),requires_grad=True)
        
        self.fc = nn.Linear(in_features=512, out_features=2)
    
    def forward(self, x):
        x = self.embed(x)
        
        x1 = torch.permute(input=x, dims=(0, 2, 1))
        x2 = torch.bmm(x,x1)
        x2 = torch.mean(x2,2)
        a = torch.softmax(x2,1)
        # print('a:',a.shape)
        x2 = x2*a

        
        h0 = torch.zeros(1, x.size(0), 512).float().to(device=device)
        x = torch.permute(input=x, dims=(1, 0, 2))
        out, hn = self.rnn(x, h0)
        out = torch.permute(input=out, dims=(1, 0, 2)) #[B,seq_len,embedding_dim]
        out = torch.permute(input=out, dims=(0, 2, 1)) #[B,embedding_dim,seq_len]
 
        out = out@self.w
        # print(1,out.shape)
        out = torch.squeeze(input=out, dim=2)
        # print(2,out.shape)
        # hn = torch.sum(input=out, dim=0)
        out = out+x2
        # print(1,out.shape)
        out = self.fc(out)
        return out
    
device = "cuda:0" if torch.cuda.is_available() else "cpu"

# device = "cuda:0" if torch.cuda.is_available() else "cpu"  
model = TextRNN1(num_embeddings=dict_len, embedding_dim=512, padding_idx=word2idx[""],seq_len=seq_len)
model.to(device=device)

# 定义优化器
optimizer = torch.optim.SGD(params=model.parameters(), lr=1e-3)

# 定义损失函数
loss_fn = nn.CrossEntropyLoss()

 
a = torch.randint(0,dict_len-1,(64,seq_len)).to(device=device)
model(a)[:3]

tensor([[ -6.5141,   2.8470],
        [-15.3093,   3.4901],
        [ -2.6264,   1.9446]], device='cuda:0', grad_fn=SliceBackward0)

融合注意力，高大上了很多，然...实际效果与线性参数学习效果一样...

 
train(model)

正在进行第 1 轮训练:
train_acc: 0.6227384868421053, test_acc: 0.6295454621315002
正在进行第 2 轮训练:
train_acc: 0.6437088815789473, test_acc: 0.6204545497894287
正在进行第 3 轮训练:
train_acc: 0.6494654605263158, test_acc: 0.6527698874473572
正在进行第 4 轮训练:
train_acc: 0.6669407894736842, test_acc: 0.6718039870262146
正在进行第 5 轮训练:
train_acc: 0.63671875, test_acc: 0.6379971623420715
正在进行第 6 轮训练:
train_acc: 0.80078125, test_acc: 0.7698863744735718
正在进行第 7 轮训练:
train_acc: 0.6348684210526315, test_acc: 0.6235795497894288
正在进行第 8 轮训练:
train_acc: 0.7890625, test_acc: 0.7745028495788574
....
....
....
正在进行第 193 轮训练:
train_acc: 0.8005756578947368, test_acc: 0.7829545497894287
正在进行第 194 轮训练:
train_acc: 0.8118832236842105, test_acc: 0.7953835248947143
正在进行第 195 轮训练:
train_acc: 0.8807565789473685, test_acc: 0.8306107997894288
正在进行第 196 轮训练:
train_acc: 0.88671875, test_acc: 0.8410511374473572
正在进行第 197 轮训练:
train_acc: 0.8614309210526315, test_acc: 0.8410511374473572
正在进行第 198 轮训练:
train_acc: 0.8754111842105263, test_acc: 0.8321022748947143
正在进行第 199 轮训练:
train_acc: 0.8817845394736842, test_acc: 0.8282670497894287
正在进行第 200 轮训练:
train_acc: 0.889391447368421, test_acc: 0.8495738744735718

 
from T import words_set,word2idx,idx2word,dict_len,seq_len,device
from T import train

 

class TextRNN1(nn.Module):
    """
        基于基础RNN结构来进行情感分析
    """
    def __init__(self, num_embeddings, embedding_dim, padding_idx,seq_len,rnn_num_layers=2):
        super(TextRNN1, self).__init__()
        
        self.embed = nn.Embedding(num_embeddings=num_embeddings,
                                 embedding_dim=embedding_dim,
                                padding_idx=padding_idx)
        self.rnn_num_layers = rnn_num_layers
        self.rnn = nn.RNN(input_size=512, hidden_size=512,num_layers=rnn_num_layers,bidirectional=False)
        
        self.w = nn.Parameter(torch.ones(seq_len,1, dtype=torch.float32),requires_grad=True)
        
        self.fc = nn.Linear(in_features=512, out_features=2)
    
    def forward(self, x):
        x = self.embed(x)
        h0 = torch.zeros(self.rnn_num_layers, x.size(0), 512).float().to(device=device)
        x = torch.permute(input=x, dims=(1, 0, 2))
        out, hn = self.rnn(x, h0)
        out = torch.permute(input=out, dims=(1, 0, 2)) #[B,seq_len,embedding_dim]
        out = torch.permute(input=out, dims=(0, 2, 1)) #[B,embedding_dim,seq_len]
 
        out = out@self.w
        # print(1,out.shape)
        out = torch.squeeze(input=out, dim=2)
        # print(2,out.shape)
        # hn = torch.sum(input=out, dim=0)
        out = self.fc(out)
        return out
    
device = "cuda:0" if torch.cuda.is_available() else "cpu"

# device = "cuda:0" if torch.cuda.is_available() else "cpu"  
model = TextRNN1(num_embeddings=dict_len, embedding_dim=512, padding_idx=word2idx[""],seq_len=seq_len,rnn_num_layers=2)
model.to(device=device)

# 定义优化器
optimizer = torch.optim.SGD(params=model.parameters(), lr=1e-3)

# 定义损失函数
loss_fn = nn.CrossEntropyLoss()

依然表现出不稳定性

 
train(model)

正在进行第 1 轮训练:
train_acc: 0.502672697368421, test_acc: 0.49595171213150024
正在进行第 2 轮训练:
train_acc: 0.5092516447368421, test_acc: 0.5020596623420716
正在进行第 3 轮训练:
train_acc: 0.6147203947368421, test_acc: 0.6397727251052856
正在进行第 4 轮训练:
train_acc: 0.6537828947368421, test_acc: 0.6372159123420715
正在进行第 5 轮训练:
train_acc: 0.7039473684210527, test_acc: 0.7002840995788574
正在进行第 6 轮训练:
train_acc: 0.7206003289473685, test_acc: 0.7237926125526428
正在进行第 7 轮训练:
train_acc: 0.6905838815789473, test_acc: 0.7029119372367859
正在进行第 8 轮训练:
train_acc: 0.6079358552631579, test_acc: 0.5798295497894287
正在进行第 9 轮训练:
train_acc: 0.653577302631579, test_acc: 0.66171875
...
...
...

正在进行第 287 轮训练:
train_acc: 0.9292763157894737, test_acc: 0.8425426244735718
正在进行第 288 轮训练:
train_acc: 0.9319490131578947, test_acc: 0.8455965995788575
正在进行第 289 轮训练:
train_acc: 0.9229029605263158, test_acc: 0.8397727370262146
正在进行第 290 轮训练:
train_acc: 0.9212582236842105, test_acc: 0.8308948874473572
正在进行第 291 轮训练:
train_acc: 0.9222861842105263, test_acc: 0.8284801244735718
正在进行第 292 轮训练:
train_acc: 0.9292763157894737, test_acc: 0.8455965995788575
正在进行第 293 轮训练:
train_acc: 0.9222861842105263, test_acc: 0.8335227370262146
正在进行第 294 轮训练:
train_acc: 0.9286595394736842, test_acc: 0.8350852370262146
正在进行第 295 轮训练:
train_acc: 0.9356496710526315, test_acc: 0.8432528495788574
正在进行第 296 轮训练:
train_acc: 0.9366776315789473, test_acc: 0.8401988744735718
正在进行第 297 轮训练:
train_acc: 0.8947368421052632, test_acc: 0.819744324684143
正在进行第 298 轮训练:
train_acc: 0.9189967105263158, test_acc: 0.8362926244735718
正在进行第 299 轮训练:
train_acc: 0.9165296052631579, test_acc: 0.8337357997894287
正在进行第 300 轮训练:
train_acc: 0.9187911184210527, test_acc: 0.83203125

 


class TextRNN1(nn.Module):
    """
        基于基础RNN结构来进行情感分析
    """
    def __init__(self, num_embeddings, embedding_dim, padding_idx,seq_len):
        super(TextRNN1, self).__init__()
        
        self.embed = nn.Embedding(num_embeddings=num_embeddings,
                                 embedding_dim=embedding_dim,
                                padding_idx=padding_idx)
        
        self.rnn = nn.RNN(input_size=512, hidden_size=512,num_layers=2,bidirectional=False)
        
        self.w = nn.Parameter(torch.ones(seq_len,1, dtype=torch.float32),requires_grad=True)
        
        self.w1 = nn.Parameter(torch.ones(1, dtype=torch.float32),requires_grad=True)
        self.w2 = nn.Parameter(torch.ones(1, dtype=torch.float32),requires_grad=True)
        
        
        self.fc = nn.Linear(in_features=512, out_features=2)
    
    def forward(self, x):
        x = self.embed(x)
        
        x1 = torch.permute(input=x, dims=(0, 2, 1))
        x2 = torch.bmm(x,x1)
        x2 = torch.mean(x2,2)
        a = torch.softmax(x2,1)
        # print('a:',a.shape)
        x2 = x2*a

        
        h0 = torch.zeros(2, x.size(0), 512).float().to(device=device)
        x = torch.permute(input=x, dims=(1, 0, 2))
        out, hn = self.rnn(x, h0)
        out = torch.permute(input=out, dims=(1, 0, 2)) #[B,seq_len,embedding_dim]
        out = torch.permute(input=out, dims=(0, 2, 1)) #[B,embedding_dim,seq_len]
 
        out = out@self.w
        # print(1,out.shape)
        out = torch.squeeze(input=out, dim=2)
        # print(2,out.shape)
        # hn = torch.sum(input=out, dim=0)
        out = out*self.w1+x2*self.w2
        
        # print(1,out.shape)
        out = self.fc(out)
        return out
    
device = "cuda:0" if torch.cuda.is_available() else "cpu"

# device = "cuda:0" if torch.cuda.is_available() else "cpu"  
model = TextRNN1(num_embeddings=dict_len, embedding_dim=512, padding_idx=word2idx[""],seq_len=seq_len)
model.to(device=device)

# 定义优化器
optimizer = torch.optim.SGD(params=model.parameters(), lr=1e-3)

# 定义损失函数
loss_fn = nn.CrossEntropyLoss()

out = out*self.w1+x2*self.w2

 
最后一步融合的时候加了学习参数，结果让收敛速度变得非常地慢 

正在进行第 259 轮训练:
train_acc: 0.8587582236842105, test_acc: 0.8132102370262146
正在进行第 260 轮训练:
train_acc: 0.8497121710526315, test_acc: 0.8088778495788574
正在进行第 261 轮训练:
train_acc: 0.8536184210526315, test_acc: 0.8107244372367859
正在进行第 262 轮训练:
train_acc: 0.78515625, test_acc: 0.7372159123420715
正在进行第 263 轮训练:
train_acc: 0.8540296052631579, test_acc: 0.8095880746841431
正在进行第 264 轮训练:
train_acc: 0.841077302631579, test_acc: 0.8037642121315003
正在进行第 265 轮训练:
train_acc: 0.6971628289473685, test_acc: 0.6632102251052856

GRU优化版

 
import torch
from torch import nn 
from torch.nn import functional as F
from T import words_set,word2idx,idx2word,dict_len,seq_len,device
from T import train

class TextRNN4(nn.Module):
    """
        基于 GRU 结构来进行情感分析
    """
    def __init__(self, num_embeddings, embedding_dim, padding_idx,rnn_num_layers=2,seq_len=seq_len):
        super(TextRNN4, self).__init__()
        
        self.embed = nn.Embedding(num_embeddings=num_embeddings,
                                    embedding_dim=embedding_dim,
                                padding_idx=padding_idx)
        self.rnn_num_layers = rnn_num_layers
        self.gru = nn.GRU(input_size=embedding_dim, hidden_size=512,num_layers=rnn_num_layers, bidirectional=False)
        self.w = nn.Parameter(torch.ones(seq_len,1, dtype=torch.float32),requires_grad=True)
        
        self.fc1 = nn.Linear(in_features=512, out_features=128)
        
        self.fc2 = nn.Linear(in_features=128, out_features=2)
    
    def forward(self, x):
        x = self.embed(x)
        h0 = torch.zeros(self.rnn_num_layers, x.size(0), 512).float().to(device=device)
        x = torch.permute(input=x, dims=(1, 0, 2))
        out, hn = self.gru(x, h0)
#         hn = torch.squeeze(input=hn, dim=0)
        # out = torch.sum(input=out, dim=0)
        out = torch.permute(input=out, dims=(1, 0, 2)) #[B,seq_len,embedding_dim]
        out = torch.permute(input=out, dims=(0, 2, 1)) #[B,embedding_dim,seq_len]
    
        out = out@self.w
        out = torch.squeeze(input=out, dim=2)
        out = self.fc1(out)
        out = F.relu(out)
        out = self.fc2(out)
        return out
    
device = "cuda:0" if torch.cuda.is_available() else "cpu"

# device = "cuda:0" if torch.cuda.is_available() else "cpu"  
model = TextRNN4(num_embeddings=dict_len, embedding_dim=512, padding_idx=word2idx[""],seq_len=seq_len)
model.to(device=device)

# 定义优化器
optimizer = torch.optim.SGD(params=model.parameters(), lr=1e-3)

# 定义损失函数
loss_fn = nn.CrossEntropyLoss()

 
a = torch.randint(0,dict_len-1,(64,seq_len)).to(device=device)
model(a)[:3]

 
tensor([[-0.0076, -1.2789],
[-1.9879, -1.3223],
[ 0.3328, -3.4323]], device='cuda:0', grad_fn=SliceBackward0)

 
train(model)
    
正在进行第 1 轮训练:
train_acc: 0.5960115131578947, test_acc: 0.6265625
正在进行第 2 轮训练:
train_acc: 0.5030838815789473, test_acc: 0.49595171213150024
正在进行第 3 轮训练:
train_acc: 0.6938733552631579, test_acc: 0.6837357997894287
正在进行第 4 轮训练:
train_acc: 0.6461759868421053, test_acc: 0.6132102251052857
正在进行第 5 轮训练:
train_acc: 0.6975740131578947, test_acc: 0.6717329621315002
正在进行第 6 轮训练:
train_acc: 0.7230674342105263, test_acc: 0.7051136374473572
正在进行第 7 轮训练:
train_acc: 0.7364309210526315, test_acc: 0.7055397748947143
正在进行第 8 轮训练:
train_acc: 0.7099095394736842, test_acc: 0.7137073874473572
正在进行第 9 轮训练:
train_acc: 0.5779194078947368, test_acc: 0.5641335248947144
正在进行第 10 轮训练:
train_acc: 0.7571957236842105, test_acc: 0.7381392121315002
正在进行第 11 轮训练:
train_acc: 0.759046052631579, test_acc: 0.7424715995788574
正在进行第 12 轮训练:
train_acc: 0.7259457236842105, test_acc: 0.6887784123420715
正在进行第 13 轮训练:
train_acc: 0.7674753289473685, test_acc: 0.72890625
正在进行第 14 轮训练:
train_acc: 0.6486430921052632, test_acc: 0.6557528495788574
正在进行第 15 轮训练:
train_acc: 0.6946957236842105, test_acc: 0.6487926125526429
正在进行第 16 轮训练:
train_acc: 0.752672697368421, test_acc: 0.7154119372367859
正在进行第 17 轮训练:
train_acc: 0.7859786184210527, test_acc: 0.761150574684143
正在进行第 18 轮训练:
train_acc: 0.5587993421052632, test_acc: 0.5475142121315002
正在进行第 19 轮训练:
train_acc: 0.7826891447368421, test_acc: 0.7446732997894288
正在进行第 20 轮训练:
train_acc: 0.5546875, test_acc: 0.5762784123420716
正在进行第 21 轮训练:
train_acc: 0.7974917763157895, test_acc: 0.7553267121315003
正在进行第 22 轮训练:
train_acc: 0.7549342105263158, test_acc: 0.7100142121315003
正在进行第 23 轮训练:
train_acc: 0.6217105263157895, test_acc: 0.5901278495788574
正在进行第 24 轮训练:
train_acc: 0.7255345394736842, test_acc: 0.7355823874473572
正在进行第 25 轮训练:
train_acc: 0.7826891447368421, test_acc: 0.7743607997894287
正在进行第 26 轮训练:
train_acc: 0.7039473684210527, test_acc: 0.704900574684143
正在进行第 27 轮训练:
train_acc: 0.6934621710526315, test_acc: 0.6998579621315002
正在进行第 28 轮训练:
train_acc: 0.7380756578947368, test_acc: 0.7480113625526428
正在进行第 29 轮训练:
train_acc: 0.7534950657894737, test_acc: 0.7097301125526428
正在进行第 30 轮训练:
train_acc: 0.7039473684210527, test_acc: 0.6578125
正在进行第 31 轮训练:
train_acc: 0.77734375, test_acc: 0.7390625
正在进行第 32 轮训练:
train_acc: 0.7711759868421053, test_acc: 0.7329545497894288
正在进行第 33 轮训练:
train_acc: 0.6461759868421053, test_acc: 0.6142755746841431
正在进行第 34 轮训练:
train_acc: 0.8032483552631579, test_acc: 0.78671875
正在进行第 35 轮训练:
train_acc: 0.7884457236842105, test_acc: 0.7833096623420716
正在进行第 36 轮训练:
train_acc: 0.7569901315789473, test_acc: 0.7615056872367859
正在进行第 37 轮训练:
train_acc: 0.7419819078947368, test_acc: 0.6953835248947143
正在进行第 38 轮训练:
train_acc: 0.7925575657894737, test_acc: 0.7453125
正在进行第 39 轮训练:
train_acc: 0.8100328947368421, test_acc: 0.7690340995788574
正在进行第 40 轮训练:
train_acc: 0.7060032894736842, test_acc: 0.6689630746841431
正在进行第 41 轮训练:
train_acc: 0.7911184210526315, test_acc: 0.7840198874473572
正在进行第 42 轮训练:
train_acc: 0.7991365131578947, test_acc: 0.79375
正在进行第 43 轮训练:
train_acc: 0.8256578947368421, test_acc: 0.8014204621315002
正在进行第 44 轮训练:
train_acc: 0.7129934210526315, test_acc: 0.7213068246841431
正在进行第 45 轮训练:
train_acc: 0.5993009868421053, test_acc: 0.5727272748947143
正在进行第 46 轮训练:
train_acc: 0.8390213815789473, test_acc: 0.8026278495788575
正在进行第 47 轮训练:
train_acc: 0.7775493421052632, test_acc: 0.7833096623420716
正在进行第 48 轮训练:
train_acc: 0.6036184210526315, test_acc: 0.6108664751052857
正在进行第 49 轮训练:
train_acc: 0.7481496710526315, test_acc: 0.7030539870262146
正在进行第 50 轮训练:
train_acc: 0.8009868421052632, test_acc: 0.7875710248947143
正在进行第 51 轮训练:
train_acc: 0.7185444078947368, test_acc: 0.728338074684143
正在进行第 52 轮训练:
train_acc: 0.7193667763157895, test_acc: 0.6794034123420716
正在进行第 53 轮训练:
train_acc: 0.6944901315789473, test_acc: 0.6545454621315002
正在进行第 54 轮训练:
train_acc: 0.8402549342105263, test_acc: 0.7958806872367858
正在进行第 55 轮训练:
train_acc: 0.8159950657894737, test_acc: 0.8026988744735718
正在进行第 56 轮训练:
train_acc: 0.8342927631578947, test_acc: 0.8084517121315002
正在进行第 57 轮训练:
train_acc: 0.8439555921052632, test_acc: 0.8029119372367859
正在进行第 58 轮训练:
train_acc: 0.8114720394736842, test_acc: 0.7667613744735717
正在进行第 59 轮训练:
train_acc: 0.8441611842105263, test_acc: 0.8099431872367859
正在进行第 60 轮训练:
train_acc: 0.7921463815789473, test_acc: 0.7349431872367859
正在进行第 61 轮训练:
train_acc: 0.8266858552631579, test_acc: 0.8092329621315002
正在进行第 62 轮训练:
train_acc: 0.7335526315789473, test_acc: 0.74140625
正在进行第 63 轮训练:
train_acc: 0.8375822368421053, test_acc: 0.7951704621315002
正在进行第 64 轮训练:
train_acc: 0.8451891447368421, test_acc: 0.8024857997894287
正在进行第 65 轮训练:
train_acc: 0.8036595394736842, test_acc: 0.8034090995788574
正在进行第 66 轮训练:
train_acc: 0.7979029605263158, test_acc: 0.7561789870262146
正在进行第 67 轮训练:
train_acc: 0.8223684210526315, test_acc: 0.7737926244735718
正在进行第 68 轮训练:
train_acc: 0.852796052631579, test_acc: 0.8145596623420716
正在进行第 69 轮训练:
train_acc: 0.8205180921052632, test_acc: 0.7683238744735718
正在进行第 70 轮训练:
train_acc: 0.8227796052631579, test_acc: 0.8089488744735718
正在进行第 71 轮训练:
train_acc: 0.7944078947368421, test_acc: 0.7480823874473572
正在进行第 72 轮训练:
train_acc: 0.7909128289473685, test_acc: 0.7422585248947143
正在进行第 73 轮训练:
train_acc: 0.6566611842105263, test_acc: 0.6286931872367859
正在进行第 74 轮训练:
train_acc: 0.7481496710526315, test_acc: 0.7042613625526428
正在进行第 75 轮训练:
train_acc: 0.7701480263157895, test_acc: 0.7671164870262146
正在进行第 76 轮训练:
train_acc: 0.7571957236842105, test_acc: 0.7639204621315002
正在进行第 77 轮训练:
train_acc: 0.86328125, test_acc: 0.8176846623420715
正在进行第 78 轮训练:
train_acc: 0.8077713815789473, test_acc: 0.7589488744735717
。。。。
。。。。
。。。。
train_acc: 0.9097450657894737, test_acc: 0.8473011374473571
正在进行第 286 轮训练:
train_acc: 0.7378700657894737, test_acc: 0.7109375
正在进行第 287 轮训练:
train_acc: 0.8969983552631579, test_acc: 0.8231534123420715
正在进行第 288 轮训练:
train_acc: 0.9286595394736842, test_acc: 0.8594460248947143
正在进行第 289 轮训练:
train_acc: 0.9344161184210527, test_acc: 0.8642045497894287
正在进行第 290 轮训练:
train_acc: 0.9189967105263158, test_acc: 0.854900574684143
正在进行第 291 轮训练:
train_acc: 0.6891447368421053, test_acc: 0.64375
正在进行第 292 轮训练:
train_acc: 0.9263980263157895, test_acc: 0.8576704621315002
正在进行第 293 轮训练:
train_acc: 0.8460115131578947, test_acc: 0.8085227370262146
正在进行第 294 轮训练:
train_acc: 0.9385279605263158, test_acc: 0.8755681872367859
正在进行第 295 轮训练:
train_acc: 0.9409950657894737, test_acc: 0.8747869372367859
正在进行第 296 轮训练:
train_acc: 0.9379111842105263, test_acc: 0.8736505746841431
正在进行第 297 轮训练:
train_acc: 0.9432565789473685, test_acc: 0.8747869372367859
正在进行第 298 轮训练:
train_acc: 0.9389391447368421, test_acc: 0.8700994372367858
正在进行第 299 轮训练:
train_acc: 0.9424342105263158, test_acc: 0.8735795497894288
正在进行第 300 轮训练:
train_acc: 0.94140625, test_acc: 0.8774857997894288

 
import torch
from torch import nn 
from torch.nn import functional as F
from T import words_set,word2idx,idx2word,dict_len,seq_len,device
from T import train

 

class TextRNN4(nn.Module):
    """
        基于 GRU 结构来进行情感分析
    """
    def __init__(self, num_embeddings, embedding_dim, padding_idx,rnn_num_layers=2,seq_len=seq_len):
        super(TextRNN4, self).__init__()
        
        self.embed = nn.Embedding(num_embeddings=num_embeddings,
                                 embedding_dim=embedding_dim,
                                padding_idx=padding_idx)
        self.rnn_num_layers = rnn_num_layers
        self.gru = nn.GRU(input_size=embedding_dim, hidden_size=512,num_layers=rnn_num_layers, bidirectional=False)
        self.w = nn.Parameter(torch.ones(seq_len,1, dtype=torch.float32),requires_grad=True)
        
        self.fc1 = nn.Linear(in_features=512, out_features=128)
        
        self.fc2 = nn.Linear(in_features=128, out_features=2)
    
    def forward(self, x):
        x = self.embed(x)
        
        x1 = torch.permute(input=x, dims=(0, 2, 1))
        x2 = torch.bmm(x,x1)
        x2 = torch.mean(x2,2)
        a = torch.softmax(x2,1)
        # print('a:',a.shape)
        x2 = x2*a
        
        h0 = torch.zeros(self.rnn_num_layers, x.size(0), 512).float().to(device=device)
        x = torch.permute(input=x, dims=(1, 0, 2))
        out, hn = self.gru(x, h0)
#         hn = torch.squeeze(input=hn, dim=0)
        # out = torch.sum(input=out, dim=0)
        out = torch.permute(input=out, dims=(1, 0, 2)) #[B,seq_len,embedding_dim]
        out = torch.permute(input=out, dims=(0, 2, 1)) #[B,embedding_dim,seq_len]
 
        out = out@self.w
        out = torch.squeeze(input=out, dim=2)
        
        out = out + x2
        
        
        out = self.fc1(out)
        out = F.relu(out)
        out = self.fc2(out)
        return out
    
device = "cuda:0" if torch.cuda.is_available() else "cpu"

# device = "cuda:0" if torch.cuda.is_available() else "cpu"  
model = TextRNN4(num_embeddings=dict_len, embedding_dim=512, padding_idx=word2idx["<PAD>"],seq_len=seq_len)
model.to(device=device)

# 定义优化器
optimizer = torch.optim.SGD(params=model.parameters(), lr=1e-3)

# 定义损失函数
loss_fn = nn.CrossEntropyLoss()

 
a = torch.randint(0,dict_len-1,(64,seq_len)).to(device=device)
model(a)[:3]
tensor([[ 3.0386,  3.3657],
        [ 7.9816,  0.3211],
        [ 4.6253, -1.9200]], device='cuda:0', grad_fn=SliceBackward0)

 
train(model)

正在进行第 1 轮训练:
train_acc: 0.6212993421052632, test_acc: 0.6240767121315003
正在进行第 2 轮训练:
train_acc: 0.63671875, test_acc: 0.6154119372367859
正在进行第 3 轮训练:
train_acc: 0.5916940789473685, test_acc: 0.5946022748947144
正在进行第 4 轮训练:
train_acc: 0.5333059210526315, test_acc: 0.5214488625526428
正在进行第 5 轮训练:
train_acc: 0.6903782894736842, test_acc: 0.6786931872367858
...
...
...
正在进行第 294 轮训练:
train_acc: 0.9333881578947368, test_acc: 0.8639914870262146
正在进行第 295 轮训练:
train_acc: 0.9399671052631579, test_acc: 0.8640625
正在进行第 296 轮训练:
train_acc: 0.9422286184210527, test_acc: 0.8389914870262146
正在进行第 297 轮训练:
train_acc: 0.9346217105263158, test_acc: 0.8578835248947143
正在进行第 298 轮训练:
train_acc: 0.8784950657894737, test_acc: 0.8085227370262146
正在进行第 299 轮训练:
train_acc: 0.9432565789473685, test_acc: 0.8585227370262146
正在进行第 300 轮训练:
train_acc: 0.9346217105263158, test_acc: 0.8484375

参考

七三笔记路线：学习，记录，分享