评论索引数据集简介
评论二分类:正面评论-0,负面评论-1 文本向量化后转为索引矩阵,索引从0开始编码,个数为词典长度 一段文本是一个1维索引向量,比如,[3,2,88,94,...,1,33],对应一个标签,比如1 多个文本形成一个批次,比如[batch_size,seq_len],对应一个批次的标签[batch_size]
获取索引数据集
import torch from torch import nn from torch.nn import functional as F from torch.utils.data import Dataset from torch.utils.data import DataLoader from ai.datasets import load_hotel X_train,y_train,X_test,y_test = load_hotel(return_dict=False, return_Xy=True) class MyDataSet(Dataset): """ 构建数据集 """ def __init__(self, X, y): self.X = X self.y = y def __len__(self): return len(self.X) def __getitem__(self, idx): x = self.X[idx] y = self.y[idx] return torch.tensor(data=x).long(), torch.tensor(data=y).long() train_dataset = MyDataSet(X=X_train, y=y_train) test_dataset = MyDataSet(X=X_test,y=y_test) X_train: (4800, 85) y_train: (4800,) X_test: (1200, 85) y_test: (1200,) MyDataSet中,将数据x转成了long类型tensor,标签也转成了long类型tensor 在pytorch中,一个标量也是一个tensor
批量加载
# 从数据集中批次取数据 train_dataloader = DataLoader(dataset=train_dataset, shuffle=True, batch_size=128) for X,y in train_dataloader: print(X.shape,X.ndim,y.shape,y.ndim) # torch.Size([128, 85]) 2 torch.Size([128]) 1 print(type(X)) # class 'torch.Tensor' break 训练集shuffle=True, 测试集shuffle=False 批次batch_size在机器性能允许的范围内尽可能大点 从loader中取出来的批次数据类型为 Tensor
索引矩阵到RNN
一个索引对应一个单词, 在模型中由nn.Embedding模板类生成的对象转为一个维数为embedding_dim向量, 然后批次数据的shape,就转换为[batch_size,seq_len,embedding_dim] 通常情况下,pytorch的批数据格式为[批次,特征个数,特征shape] 这个是大方向,数据的shape要统一转化为这个格式, 不同的模型需要不同的数据格式时,再在模型内部进行转化 对于文本类数据, 特征个数C,就是单词的维数,即embedding_dim 特征shape,即序列长度seq_len, RNN对象默认参数,极其看重特征的顺序, 因为它就是按特征的顺序进行for循环计算的, 所以序列的维度默认在dim=0上, RNN数据的shape为[seq_len,batch_size,embedding_dim] 因此数据进入RNN前还要交换数据后面的两个维度, [batch_size,seq_len,embedding_dim] --> [seq_len, batch_size, embedding_dim] 后续就是RNN对数据的变换了
RNN的数据shape转化
需要思考并确定三个值: 要将一个单词映射到几维向量:hidden_size RNN是双向还是单向:bidirectional RNN的层数:num_layers 这三个定下之后,那么RNN的参数转化就定了 h0.shape = hn.shape if bidirectional: hn.shape = [2*num_layers, batch_size, hidden_size] output.shape = [seq_len, batch_size, 2*hidden_size] else: hn.shape = [num_layers, batch_size, hidden_size] output.shape = [seq_len, batch_size, hidden_size]
import torch from torch import nn from torch.nn import functional as F from torch.utils.data import Dataset from torch.utils.data import DataLoader from ai.datasets import load_hotel X_train,y_train,X_test,y_test = load_hotel(return_Xy=True) seq_len = X_train.shape[1] import sys,os class ParamConfig(): padding_idx = 0 embedding_dim = 256 hidden_size = 512 output_size = 2 bidirectional = True num_layers=2 batch_size = 128 debug = False BASE_DIR = sys.path[0] param_path = os.path.join(BASE_DIR,"model/model_rnn1.pkl") log_file = os.path.join(BASE_DIR,"main.log") def __init__(self, isTest=False, seq_len=seq_len) -> None: if isTest:# 测试不需要加载真实数据,随机给个数,快速验证模型 self.dict_len = 10000 self.seq_len = seq_len self.debug = True else: words_set,word2idx = load_hotel(return_dict=True) dict_len = len(words_set) print(f"dict_len:{dict_len}") # dict_len:21437 self.dict_len = dict_len self.word2idx = word2idx self.seq_len = seq_len pm = ParamConfig() # pm = ParamConfig(isTest=True) class MyDataSet(Dataset): """ 构建数据集 """ def __init__(self, X, y): self.X = X self.y = y def __len__(self): return len(self.X) def __getitem__(self, idx): x = self.X[idx] y = self.y[idx] return torch.tensor(data=x).long(), torch.tensor(data=y).long() train_dataset = MyDataSet(X=X_train, y=y_train) test_dataset = MyDataSet(X=X_test,y=y_test) # 从数据集中批次取数据 train_dataloader = DataLoader(dataset=train_dataset, shuffle=True, batch_size = pm.batch_size) for X,y in train_dataloader: print(X.shape,X.ndim,y.shape,y.ndim) # torch.Size([128, 85]) 2 torch.Size([128]) 1 break class RNNClassify1(nn.Module): def __init__(self, dict_len, input_size, hidden_size, output_size, num_layers=2, bidirectional=True,debug=pm.debug): super(RNNClassify1, self).__init__() self.embedding = nn.Embedding(num_embeddings=dict_len, embedding_dim=input_size, padding_idx=0) self.input_size = input_size self.hidden_size = hidden_size self.output_size = output_size self.debug = debug self.model = nn.RNN(input_size=input_size, hidden_size=hidden_size, num_layers=num_layers, bidirectional=bidirectional) if bidirectional: self.single_rnn_nums = num_layers*2 else: self.single_rnn_nums = num_layers self.fc = nn.Linear(hidden_size, output_size) def forward(self,X): """ - X:2维索引矩阵 - """ if X.ndim != 2: throw("X must be 2-dimensional") # 这个批次是动态的,因为不管你指定大多,最后一个批次(余数)大概率不与该指定值相同 batch_size = X.shape[0] x = self.embedding(X) # [B,seq_len] --> [B,seq_len,embedding] if self.debug: print(f"embedding:{x.shape}") # embedding:torch.Size([128, 85, 256]) x = torch.permute(input=x, dims=(1,0,2)) # [B,seq_len,embedding] --> [seq_len,B,embedding] if self.debug: print(f"permute后x.shape:{x.shape}") # permute后x.shape:torch.Size([85, 128, 256]) h0 = torch.zeros(self.single_rnn_nums, batch_size, self.hidden_size) # h0与hn是一个单向RNN链的两端,一个是首单词的初始化向量,一个是尾单词的输出向量 out, hn = self.model(x, h0) #根据RNN思想,最后一个单词的输出包含整个序列的信息 #意思就是最后一个单词的输出,也是整个序列的输出 #即hn中一个长度为hidden_size的向量就是一个序列的上下文向量 #现有batch_size个序列,应该有batch_size个长度为hidden_size的上下文向量 #需要将hn的shape转化为[batch_size,hidden_size] #对hn的dim=0维度进行sum,使该维消失,正好满足需求 #hn的dim=0维是指几条单向RNN链,最后将所有的单向RNN链最后一个单词的输出,融合到一起 #这里选择了相加,融合,还有一种做法,就是拼接,将多个单向RNN的输出结果拼接到一起 out = torch.sum(input=hn, dim=0) #全连接分类 out = self.fc(out) return out model = RNNClassify1(dict_len=pm.dict_len, input_size=pm.embedding_dim, hidden_size=pm.hidden_size, output_size=pm.output_size, num_layers=pm.num_layers, bidirectional=pm.bidirectional) X_test = torch.randint(low=0,high=pm.dict_len,size=(pm.batch_size, pm.seq_len)) y_out = model(X_test) print(y_out.shape) # 定义损失函数 loss_fn = nn.CrossEntropyLoss() from ai.dl import T T.train(model=model,loss_fn=loss_fn,optimizer="adam", continuation=True, is_regression=False, learning_rate=1e-3, epochs=10, auto_save=True, train_dataset=train_dataset, test_dataset=test_dataset, model_param_path=pm.param_path, log_file=pm.log_file)
较好效果的模型
import torch from torch import nn from torch.nn import functional as F from torch.utils.data import Dataset from torch.utils.data import DataLoader # 加载数据集 from ai.datasets import load_hotel X_train,y_train,X_test,y_test = load_hotel(return_Xy=True) seq_len = X_train.shape[1] import sys,os class ParamConfig(): padding_idx = 0 embedding_dim = 256 hidden_size = 512 output_size = 2 bidirectional = True num_layers=2 batch_size = 128 debug = False BASE_DIR = sys.path[0] param_path = os.path.join(BASE_DIR,"model/model_gru8.pkl") log_file = os.path.join(BASE_DIR,"main.log") def __init__(self, isTest=False, seq_len=seq_len) -> None: if isTest:# 测试不需要加载真实数据,随机给个数,快速验证模型 self.dict_len = 10000 self.seq_len = seq_len self.debug = True else: words_set,word2idx = load_hotel(return_dict=True) dict_len = len(words_set) print(f"dict_len:{dict_len}") # dict_len:21437 self.dict_len = dict_len self.word2idx = word2idx self.seq_len = seq_len pm = ParamConfig() # pm = ParamConfig(isTest=True) class MyDataSet(Dataset): """ 构建数据集 """ def __init__(self, X, y): self.X = X self.y = y def __len__(self): return len(self.X) def __getitem__(self, idx): x = self.X[idx] y = self.y[idx] return torch.tensor(data=x).long(), torch.tensor(data=y).long() train_dataset = MyDataSet(X=X_train, y=y_train) test_dataset = MyDataSet(X=X_test,y=y_test) # 从数据集中批次取数据 train_dataloader = DataLoader(dataset=train_dataset, shuffle=True, batch_size = pm.batch_size) for X,y in train_dataloader: print(X.shape,X.ndim,y.shape,y.ndim) # torch.Size([128, 85]) 2 torch.Size([128]) 1 break class RNNClassify1(nn.Module): def __init__(self, dict_len, input_size, hidden_size, output_size, num_layers=2, bidirectional=True,debug=pm.debug): super(RNNClassify1, self).__init__() self.embedding = nn.Embedding(num_embeddings=dict_len, embedding_dim=input_size, padding_idx=0) self.input_size = input_size self.hidden_size = hidden_size self.output_size = output_size self.debug = debug self.model = nn.GRU(input_size=input_size, hidden_size=hidden_size, num_layers=num_layers, bidirectional=bidirectional) if bidirectional: self.single_rnn_nums = num_layers*2 else: self.single_rnn_nums = num_layers self.fc1 = nn.Linear(hidden_size, hidden_size) self.fc2 = nn.Linear(hidden_size, output_size) def forward(self,X): """ - X:2维索引矩阵 - """ if X.ndim != 2: throw("X must be 2-dimensional") # 这个批次是动态的,因为不管你指定大多,最后一个批次(余数)大概率不与该指定值相同 batch_size = X.shape[0] x = self.embedding(X) # [B,seq_len] --> [B,seq_len,embedding] if self.debug: print(f"embedding:{x.shape}") # embedding:torch.Size([128, 85, 256]) x = torch.permute(input=x, dims=(1,0,2)) # [B,seq_len,embedding] --> [seq_len,B,embedding] if self.debug: print(f"permute后x.shape:{x.shape}") # permute后x.shape:torch.Size([85, 128, 256]) h0 = torch.zeros(self.single_rnn_nums, batch_size, self.hidden_size) # h0与hn是一个单向RNN链的两端,一个是首单词的初始化向量,一个是尾单词的输出向量 out, hn = self.model(x, h0) #根据RNN思想,最后一个单词的输出包含整个序列的信息 #意思就是最后一个单词的输出,也是整个序列的输出 #即hn中一个长度为hidden_size的向量就是一个序列的上下文向量 #现有batch_size个序列,应该有batch_size个长度为hidden_size的上下文向量 #需要将hn的shape转化为[batch_size,hidden_size] #对hn的dim=0维度进行sum,使该维消失,正好满足需求 #hn的dim=0维是指几条单向RNN链,最后将所有的单向RNN链最后一个单词的输出,融合到一起 #这里选择了相加,融合,还有一种做法,就是拼接,将多个单向RNN的输出结果拼接到一起 # out = torch.sum(input=hn, dim=0) out = out[:,:,self.hidden_size:] + out[:,:,:self.hidden_size] out = torch.sum(input=out, dim=0) #全连接分类 out = self.fc1(out) out = self.fc2(out) return out model = RNNClassify1(dict_len=pm.dict_len, input_size=pm.embedding_dim, hidden_size=pm.hidden_size, output_size=pm.output_size, num_layers=pm.num_layers, bidirectional=pm.bidirectional) # 定义损失函数 loss_fn = nn.CrossEntropyLoss() from ai.dl import T T.train(model=model,loss_fn=loss_fn,optimizer="adam", continuation=True, is_regression=False, learning_rate=1e-3, epochs=20, auto_save=True, train_dataset=train_dataset, test_dataset=test_dataset, model_param_path=pm.param_path, log_file=pm.log_file)
import torch from torch import nn from torch.nn import functional as F from torch.utils.data import Dataset from torch.utils.data import DataLoader # 加载数据集 from ai.datasets import load_hotel X_train,y_train,X_test,y_test = load_hotel(return_Xy=True) seq_len = X_train.shape[1] import sys,os class ParamConfig(): padding_idx = 0 embedding_dim = 256 hidden_size = 512 output_size = 2 bidirectional = True num_layers=2 batch_size = 128 debug = False BASE_DIR = sys.path[0] param_path = os.path.join(BASE_DIR,"model/model_gru8.pkl") log_file = os.path.join(BASE_DIR,"main.log") def __init__(self, isTest=False, seq_len=seq_len) -> None: if isTest:# 测试不需要加载真实数据,随机给个数,快速验证模型 self.dict_len = 10000 self.seq_len = seq_len self.debug = True else: words_set,word2idx = load_hotel(return_dict=True) dict_len = len(words_set) print(f"dict_len:{dict_len}") # dict_len:21437 self.dict_len = dict_len self.word2idx = word2idx self.seq_len = seq_len pm = ParamConfig() class RNNClassify1(nn.Module): def __init__(self, dict_len, input_size, hidden_size, output_size, num_layers=2, bidirectional=True,debug=pm.debug): super(RNNClassify1, self).__init__() self.embedding = nn.Embedding(num_embeddings=dict_len, embedding_dim=input_size, padding_idx=0) self.input_size = input_size self.hidden_size = hidden_size self.output_size = output_size self.debug = debug self.model = nn.GRU(input_size=input_size, hidden_size=hidden_size, num_layers=num_layers, bidirectional=bidirectional) if bidirectional: self.single_rnn_nums = num_layers*2 else: self.single_rnn_nums = num_layers self.fc1 = nn.Linear(hidden_size, hidden_size) self.fc2 = nn.Linear(hidden_size, output_size) def forward(self,X): """ - X:2维索引矩阵 - """ if X.ndim != 2: throw("X must be 2-dimensional") # 这个批次是动态的,因为不管你指定大多,最后一个批次(余数)大概率不与该指定值相同 batch_size = X.shape[0] x = self.embedding(X) # [B,seq_len] --> [B,seq_len,embedding] if self.debug: print(f"embedding:{x.shape}") # embedding:torch.Size([128, 85, 256]) x = torch.permute(input=x, dims=(1,0,2)) # [B,seq_len,embedding] --> [seq_len,B,embedding] if self.debug: print(f"permute后x.shape:{x.shape}") # permute后x.shape:torch.Size([85, 128, 256]) h0 = torch.zeros(self.single_rnn_nums, batch_size, self.hidden_size) # h0与hn是一个单向RNN链的两端,一个是首单词的初始化向量,一个是尾单词的输出向量 out, hn = self.model(x, h0) #根据RNN思想,最后一个单词的输出包含整个序列的信息 #意思就是最后一个单词的输出,也是整个序列的输出 #即hn中一个长度为hidden_size的向量就是一个序列的上下文向量 #现有batch_size个序列,应该有batch_size个长度为hidden_size的上下文向量 #需要将hn的shape转化为[batch_size,hidden_size] #对hn的dim=0维度进行sum,使该维消失,正好满足需求 #hn的dim=0维是指几条单向RNN链,最后将所有的单向RNN链最后一个单词的输出,融合到一起 #这里选择了相加,融合,还有一种做法,就是拼接,将多个单向RNN的输出结果拼接到一起 # out = torch.sum(input=hn, dim=0) out = out[:,:,self.hidden_size:] + out[:,:,:self.hidden_size] out = torch.sum(input=out, dim=0) #全连接分类 out = self.fc1(out) out = self.fc2(out) return out model = RNNClassify1(dict_len=pm.dict_len, input_size=pm.embedding_dim, hidden_size=pm.hidden_size, output_size=pm.output_size, num_layers=pm.num_layers, bidirectional=pm.bidirectional) model.load_state_dict(torch.load(pm.param_path)) X_train: (4800, 85) y_train: (4800,) X_test: (1200, 85) y_test: (1200,) dict_len:21437
参数查看
params = model.parameters() for param in params: print(param) Parameter containing: tensor([[ 0.0000, 0.0000, 0.0000, ..., 0.0000, 0.0000, 0.0000], [ 1.1802, 0.7505, -0.9388, ..., -0.3928, -1.3043, -1.4117], [ 0.5218, 0.7069, -0.5760, ..., 0.1971, -0.6769, 1.0947], ..., [-1.4839, -1.3945, 1.2294, ..., -0.5314, -0.2000, -1.2279], [ 0.0224, 0.4936, 0.7235, ..., -0.7298, 0.0284, 1.5092], [ 1.3206, 0.3973, 1.6314, ..., 1.1834, -1.9032, -0.9954]], requires_grad=True) Parameter containing: tensor([[ 0.0693, 0.0321, 0.0275, ..., 0.0230, 0.0582, 0.0134], [-0.0055, 0.0122, -0.0528, ..., -0.0053, 0.0540, -0.0324], [-0.0068, -0.0148, -0.0050, ..., -0.0370, 0.0282, -0.0017], ..., [ 0.0066, -0.0636, -0.0358, ..., -0.0119, 0.0530, 0.0142], [ 0.0033, 0.0176, 0.0265, ..., -0.0460, -0.0056, 0.0219], [-0.0248, -0.0047, -0.0279, ..., -0.0037, -0.0181, 0.0326]], requires_grad=True) Parameter containing: tensor([[ 0.0370, 0.0069, 0.0149, ..., 0.0259, 0.0211, 0.0333], [ 0.0219, 0.0340, -0.0193, ..., 0.0198, -0.0432, -0.0247], [-0.0471, 0.0224, 0.0109, ..., 0.0174, -0.0180, -0.0382], ..., [ 0.0087, -0.0396, 0.0051, ..., -0.0510, -0.0020, 0.0453], [ 0.0078, -0.0054, 0.0006, ..., 0.0105, -0.0438, 0.0265], [-0.0316, -0.0105, 0.0271, ..., -0.0224, -0.0277, 0.0199]], requires_grad=True) Parameter containing: tensor([ 0.0204, -0.0189, -0.0090, ..., 0.0218, -0.0309, -0.0302], requires_grad=True) Parameter containing: tensor([-0.0398, 0.0280, 0.0159, ..., 0.0238, 0.0295, -0.0314], requires_grad=True) Parameter containing: tensor([[ 0.0154, 0.0428, -0.0074, ..., -0.0005, -0.0351, -0.0757], [-0.0535, -0.0825, 0.0145, ..., -0.0280, -0.0617, 0.0242], [-0.0410, -0.0308, -0.0457, ..., 0.0030, 0.0025, 0.0007], ..., [ 0.0101, 0.0327, -0.0012, ..., -0.0421, -0.0264, 0.0367], [-0.0211, -0.0454, -0.0315, ..., -0.0359, 0.0262, 0.0035], [ 0.0264, 0.0021, 0.0318, ..., -0.0176, -0.0241, 0.0271]], requires_grad=True) Parameter containing: tensor([[ 0.0095, 0.0283, -0.0589, ..., -0.0089, -0.0176, -0.0457], [ 0.0022, -0.0181, -0.0295, ..., -0.0417, 0.0470, -0.0161], [-0.0169, 0.0010, -0.0233, ..., 0.0155, 0.0368, 0.0318], ..., [-0.0025, -0.0400, -0.0153, ..., 0.0288, -0.0182, -0.0059], [-0.0199, 0.0168, -0.0015, ..., 0.0030, 0.0214, 0.0330], [ 0.0190, 0.0058, -0.0423, ..., -0.0341, 0.0300, -0.0216]], requires_grad=True) Parameter containing: tensor([-0.0203, -0.0047, -0.0277, ..., 0.0253, 0.0158, 0.0082], requires_grad=True) Parameter containing: tensor([-0.0060, -0.0291, 0.0055, ..., -0.0359, -0.0391, 0.0315], requires_grad=True) Parameter containing: tensor([[-0.0395, -0.0255, -0.0486, ..., 0.0474, -0.0069, -0.0043], [ 0.0054, 0.0257, -0.0380, ..., 0.0905, -0.0286, 0.0442], [ 0.0040, 0.0346, 0.0336, ..., -0.0055, 0.0330, 0.0167], ..., [ 0.0210, 0.0141, -0.0333, ..., -0.0278, -0.0215, -0.0175], [ 0.0415, 0.0039, 0.0446, ..., -0.0078, -0.0186, 0.0154], [-0.0187, 0.0082, -0.0360, ..., 0.0176, 0.0064, 0.0057]], requires_grad=True) Parameter containing: tensor([[ 0.0209, -0.0400, -0.0456, ..., -0.0005, -0.0109, 0.0261], [-0.0435, -0.0288, -0.0129, ..., 0.0267, 0.0098, 0.0111], [-0.0064, -0.0289, -0.0098, ..., 0.0331, 0.0385, 0.0188], ..., [ 0.0233, -0.0304, -0.0354, ..., -0.0051, 0.0180, -0.0445], [ 0.0029, -0.0387, 0.0154, ..., -0.0347, -0.0435, 0.0036], [-0.0108, -0.0071, -0.0138, ..., 0.0129, 0.0221, -0.0095]], requires_grad=True) Parameter containing: tensor([-0.0155, 0.0367, -0.0390, ..., 0.0014, -0.0098, -0.0116], requires_grad=True) Parameter containing: tensor([ 0.0413, 0.0380, 0.0452, ..., -0.0292, -0.0195, 0.0312], requires_grad=True) Parameter containing: tensor([[ 0.0417, 0.0099, 0.0421, ..., 0.0143, -0.0170, -0.0095], [-0.0085, 0.0091, -0.0193, ..., -0.0025, -0.0246, 0.0235], [-0.0454, 0.0445, 0.0198, ..., 0.0402, -0.0210, -0.0140], ..., [ 0.0132, 0.0189, -0.0185, ..., 0.0094, -0.0018, -0.0054], [-0.0180, -0.0366, -0.0243, ..., -0.0275, 0.0122, 0.0281], [-0.0287, -0.0319, 0.0090, ..., 0.0417, -0.0065, 0.0435]], requires_grad=True) Parameter containing: tensor([[-0.0302, -0.0129, 0.0106, ..., -0.0365, 0.0287, -0.0393], [ 0.0176, 0.0054, -0.0043, ..., -0.0314, -0.0276, 0.0002], [-0.0254, 0.0416, 0.0031, ..., -0.0468, -0.0121, 0.0020], ..., [-0.0335, -0.0049, 0.0102, ..., 0.0003, -0.0246, -0.0283], [-0.0376, 0.0309, 0.0134, ..., -0.0249, 0.0238, 0.0321], [ 0.0009, 0.0014, -0.0194, ..., -0.0075, -0.0053, -0.0098]], requires_grad=True) Parameter containing: tensor([ 0.0309, -0.0131, -0.0226, ..., -0.0201, 0.0276, 0.0148], requires_grad=True) Parameter containing: tensor([ 0.0138, 0.0010, -0.0406, ..., 0.0255, 0.0366, -0.0405], requires_grad=True) Parameter containing: tensor([[ 0.0184, 0.0253, 0.0070, ..., 0.0083, -0.0366, -0.0300], [-0.0126, -0.0126, -0.0161, ..., -0.0075, 0.0211, -0.0301], [-0.0223, -0.0436, -0.0091, ..., 0.0418, -0.0255, -0.0194], ..., [-0.0099, -0.0418, 0.0198, ..., 0.0277, -0.0030, -0.0008], [ 0.0188, 0.0018, -0.0183, ..., -0.0019, 0.0435, -0.0160], [-0.0282, -0.0451, -0.0375, ..., 0.0257, 0.0119, -0.0024]], requires_grad=True) Parameter containing: tensor([ 0.0102, 0.0341, -0.0306, 0.0291, -0.0242, -0.0070, -0.0411, 0.0199, 0.0041, 0.0138, -0.0276, 0.0058, 0.0428, 0.0009, 0.0174, 0.0125, -0.0117, -0.0164, 0.0012, 0.0260, 0.0131, -0.0327, 0.0363, -0.0333, -0.0183, -0.0036, -0.0391, -0.0317, 0.0190, 0.0333, -0.0315, 0.0165, -0.0353, 0.0040, 0.0107, -0.0293, 0.0279, 0.0370, -0.0356, 0.0433, -0.0113, 0.0359, -0.0424, 0.0212, 0.0409, -0.0237, -0.0161, 0.0159, 0.0262, 0.0193, -0.0197, -0.0178, 0.0093, -0.0406, -0.0065, 0.0263, 0.0136, -0.0231, -0.0409, 0.0302, 0.0307, -0.0387, -0.0303, 0.0055, 0.0101, -0.0380, -0.0061, -0.0053, 0.0068, -0.0305, -0.0317, -0.0063, -0.0040, 0.0167, -0.0186, 0.0071, -0.0280, 0.0432, 0.0393, 0.0185, 0.0037, 0.0248, 0.0323, 0.0210, -0.0198, 0.0347, 0.0071, -0.0357, 0.0263, -0.0167, -0.0156, -0.0136, 0.0219, -0.0076, -0.0150, -0.0049, -0.0124, 0.0315, 0.0433, -0.0387, -0.0347, -0.0387, 0.0436, 0.0172, 0.0349, -0.0416, 0.0427, -0.0167, -0.0060, -0.0020, -0.0149, 0.0330, 0.0064, -0.0163, -0.0378, -0.0324, 0.0303, 0.0118, -0.0129, -0.0213, -0.0388, 0.0230, -0.0069, 0.0396, 0.0101, -0.0325, 0.0264, 0.0132, 0.0033, 0.0292, 0.0413, -0.0202, 0.0122, -0.0074, 0.0092, 0.0174, -0.0313, -0.0214, 0.0239, 0.0001, 0.0318, 0.0387, -0.0168, 0.0438, 0.0133, -0.0013, 0.0322, -0.0388, 0.0382, -0.0128, 0.0294, -0.0355, -0.0161, -0.0065, -0.0228, -0.0265, 0.0275, -0.0210, 0.0099, -0.0198, 0.0358, 0.0319, -0.0182, -0.0406, 0.0002, -0.0142, 0.0437, -0.0403, -0.0094, 0.0202, 0.0162, 0.0214, -0.0175, -0.0301, -0.0104, 0.0431, 0.0241, -0.0121, 0.0278, 0.0369, -0.0073, -0.0309, -0.0079, 0.0407, -0.0440, -0.0099, -0.0214, 0.0353, -0.0187, 0.0386, -0.0279, -0.0426, -0.0211, -0.0377, 0.0224, 0.0266, 0.0407, -0.0097, -0.0190, -0.0202, 0.0226, -0.0204, -0.0080, -0.0285, -0.0172, 0.0330, 0.0401, 0.0325, -0.0095, 0.0044, -0.0377, 0.0260, -0.0177, 0.0164, 0.0385, 0.0391, -0.0207, 0.0045, 0.0228, -0.0240, 0.0056, -0.0311, 0.0235, 0.0048, -0.0310, -0.0021, -0.0406, -0.0164, 0.0078, -0.0078, 0.0190, 0.0055, -0.0094, -0.0423, -0.0253, -0.0068, 0.0109, 0.0059, 0.0116, -0.0082, 0.0033, -0.0266, 0.0282, 0.0150, 0.0030, -0.0100, -0.0172, 0.0036, 0.0306, -0.0279, -0.0134, 0.0239, 0.0184, -0.0348, 0.0099, 0.0106, -0.0116, -0.0403, 0.0055, -0.0404, 0.0291, -0.0369, 0.0078, 0.0301, 0.0316, 0.0133, -0.0297, 0.0078, 0.0346, -0.0410, -0.0136, -0.0219, -0.0233, -0.0105, -0.0388, 0.0220, -0.0337, 0.0178, 0.0326, -0.0043, -0.0260, 0.0136, -0.0065, -0.0305, -0.0143, 0.0285, 0.0349, 0.0167, -0.0042, -0.0090, 0.0417, -0.0183, 0.0119, 0.0111, -0.0273, -0.0429, 0.0152, 0.0288, 0.0218, 0.0314, -0.0424, 0.0294, -0.0002, 0.0439, 0.0023, 0.0215, 0.0046, -0.0405, 0.0378, -0.0028, 0.0314, 0.0265, -0.0379, -0.0243, -0.0165, 0.0048, 0.0280, 0.0331, -0.0237, -0.0397, 0.0189, -0.0378, -0.0142, 0.0074, -0.0363, -0.0329, 0.0325, 0.0349, 0.0305, 0.0122, 0.0312, -0.0385, 0.0022, 0.0031, 0.0017, -0.0104, 0.0320, -0.0106, -0.0309, -0.0048, -0.0424, 0.0362, -0.0271, 0.0424, -0.0368, 0.0383, 0.0208, -0.0063, -0.0164, 0.0099, -0.0020, -0.0074, -0.0119, 0.0257, 0.0238, 0.0378, -0.0007, 0.0066, -0.0297, -0.0207, -0.0332, 0.0364, -0.0010, 0.0026, 0.0062, -0.0073, 0.0226, 0.0438, 0.0100, -0.0279, -0.0282, 0.0214, 0.0403, 0.0269, 0.0206, -0.0304, -0.0434, -0.0118, 0.0315, 0.0310, -0.0400, 0.0182, -0.0409, -0.0329, -0.0010, -0.0269, 0.0152, -0.0092, -0.0136, 0.0357, 0.0348, -0.0157, 0.0322, 0.0286, -0.0286, 0.0110, 0.0413, 0.0122, -0.0167, 0.0186, -0.0389, -0.0237, -0.0065, 0.0292, -0.0048, 0.0333, -0.0281, 0.0086, 0.0092, -0.0119, 0.0186, 0.0298, 0.0125, 0.0198, -0.0326, -0.0376, 0.0389, -0.0006, -0.0046, -0.0411, -0.0222, -0.0202, 0.0236, 0.0160, 0.0208, -0.0172, -0.0121, -0.0117, 0.0227, 0.0137, 0.0079, 0.0282, 0.0123, -0.0416, 0.0003, -0.0017, 0.0285, -0.0234, -0.0149, 0.0258, 0.0370, -0.0397, 0.0056, -0.0187, -0.0319, -0.0404, 0.0066, 0.0129, -0.0434, -0.0076, -0.0039, -0.0408, 0.0086, -0.0150, -0.0274, 0.0129, -0.0011, 0.0335, 0.0076, 0.0300, -0.0409, -0.0436, -0.0137, -0.0166, 0.0188, 0.0364, -0.0313, 0.0016, -0.0299, -0.0286, 0.0085, 0.0057, -0.0442, -0.0354, 0.0203, 0.0350, 0.0379, 0.0150, -0.0414, 0.0191, -0.0245, 0.0124, -0.0262, 0.0412, 0.0108, -0.0422, -0.0278, -0.0188, -0.0199, 0.0234, 0.0018, 0.0320, -0.0029, 0.0381, -0.0403, 0.0089, 0.0266, 0.0408, 0.0310, 0.0030, -0.0184, -0.0382, 0.0168, -0.0356, -0.0052, -0.0048, -0.0227, -0.0426, 0.0257, -0.0214, 0.0038, -0.0409], requires_grad=True) Parameter containing: tensor([[-0.0298, 0.0013, 0.0063, ..., 0.0177, 0.0143, -0.0130], [-0.0134, 0.0035, 0.0215, ..., 0.0220, 0.0280, -0.0165]], requires_grad=True) Parameter containing: tensor([ 0.0375, -0.0276], requires_grad=True)
直接调用模型
model(torch.tensor(X_test)) tensor([[ -2.6753, 1.8119], [ 9.7845, -9.0205], [-19.9369, 24.6413], ..., [ 18.5353, -21.7781], [-12.1830, 8.3670], [ 4.7882, -2.9590]], grad_fn=)
转换技巧
a = torch.tensor([[ -2.6753, 1.8119], [ 9.7845, -9.0205], [-19.9369, 24.6413]]) a.argmax(dim =1) tensor([1, 0, 1]) 此处是正负评论,二分类问题,且负面评论为1, 与索引的0与1对应, 正常情况下,通常会做一个softmax,转概率, 但softmax的单调性此直接取max value的单调性一致 故而不如直接取最大值的索引来得方便
预测方法
def predict(model,X=X_test): y_out = model(torch.tensor(X)) return y_out.argmax(dim =1) y_out = predict(model=model,X=X_test[:3]) y_out tensor([1, 0, 1])