我有一个基本模型,其中手动创建训练数据的批次,如下所示。
import numpy as np
import matplotlib.pyplot as plt
from sklearn.datasets import load_iris
from sklearn.model_selection import train_test_split
import tqdm
import copy
import random
import torch
import torch.nn as nn
import torch.optim as optim
SEED = 12345
BATCH_SIZE= 5
N_EPOCHS = 100
torch.manual_seed(SEED)
np.random.seed(SEED)
random.seed(SEED)
dataset = load_iris()
X , y = dataset.data, dataset.target
X_train, X_test, y_train, y_test = train_test_split(X, y, train_size=0.8, shuffle=True)
X_train = torch.tensor(X_train, dtype=torch.float32)
y_train = torch.tensor(y_train, dtype=torch.float32).reshape(-1,1)
X_test = torch.tensor(X_test, dtype=torch.float32)
y_test = torch.tensor(y_test, dtype=torch.float32).reshape(-1,1)
class NeuralNetwork(nn.Module):
def __init__(self):
super().__init__()
self.linear_relu_stack = nn.Sequential(
nn.Linear(len(dataset.feature_names), 10),
nn.LeakyReLU(),
nn.Linear(10, 20),
nn.LeakyReLU(),
nn.Linear(20, 8),
nn.LeakyReLU(),
nn.Linear(8, 1)
)
def forward(self, x):
logits = self.linear_relu_stack(x)
return logits
model = NeuralNetwork()
batch_start = torch.arange(0, len(X_train), BATCH_SIZE)
best_mse = np.inf
best_weights = None
history = []
def train(model, loss_fn, optimizer):
model.train()
with tqdm.tqdm(batch_start, unit="batch", mininterval=0, disable=False) as bar:
bar.set_description(f"Epoch {epoch}")
for start in bar:
X_batch = X_train[start:start + BATCH_SIZE]
y_batch = y_train[start:start + BATCH_SIZE]
y_pred = model(X_batch)
loss = loss_fn(y_pred, y_batch)
optimizer.zero_grad()
loss.backward()
optimizer.step()
bar.set_postfix(mse=float(loss))
def test(model, loss_fn):
global best_mse
global best_weights
model.eval()
y_pred = model(X_test)
mse = loss_fn(y_pred, y_test)
mse = float(mse)
history.append(mse)
if mse < best_mse:
best_mse = mse
best_weights = copy.deepcopy(model.state_dict())
loss_fn = nn.MSELoss()
optimizer = optim.Adam(model.parameters(), lr=0.0001)
for epoch in range(N_EPOCHS):
train(model, loss_fn, optimizer)
test(model, loss_fn)
model.load_state_dict(best_weights)
print("MSE: %.2f" % best_mse)
print("RMSE: %.2f" % np.sqrt(best_mse))
plt.xlabel("Epoch")
plt.ylabel("MSE")
plt.plot(history)
plt.show()
字符串
在收敛终止于MSE = 0.07
和RMSE=0.26
时,MSE的减小如下所示
的数据
但是,当我使用dataloader自动进行批处理时,其代码如下所示
import numpy as np
import matplotlib.pyplot as plt
from sklearn.datasets import load_iris
from sklearn.model_selection import train_test_split
import tqdm
import copy
import random
import torch
from torch.utils.data import DataLoader
import torch.nn as nn
import torch.optim as optim
SEED = 12345
BATCH_SIZE= 5
N_EPOCHS = 100
torch.manual_seed(SEED)
np.random.seed(SEED)
random.seed(SEED)
dataset = load_iris()
X , y = dataset.data, dataset.target
X_train, X_test, y_train, y_test = train_test_split(X, y, train_size=0.8, shuffle=True)
X_train = torch.tensor(X_train, dtype=torch.float32)
y_train = torch.tensor(y_train, dtype=torch.float32).reshape(-1,1)
X_test = torch.tensor(X_test, dtype=torch.float32)
y_test = torch.tensor(y_test, dtype=torch.float32).reshape(-1,1)
train_dataloader = DataLoader(list(zip(X_train, y_train)), shuffle=True, batch_size=BATCH_SIZE)
test_dataloader = DataLoader(list(zip(X_test, y_test)), shuffle=False, batch_size=BATCH_SIZE)
class NeuralNetwork(nn.Module):
def __init__(self):
super().__init__()
self.linear_relu_stack = nn.Sequential(
nn.Linear(len(dataset.feature_names), 10),
nn.LeakyReLU(),
nn.Linear(10, 20),
nn.LeakyReLU(),
nn.Linear(20, 8),
nn.LeakyReLU(),
nn.Linear(8, 1)
)
def forward(self, x):
logits = self.linear_relu_stack(x)
return logits
model = NeuralNetwork()
batch_start = torch.arange(0, len(X_train), BATCH_SIZE)
best_mse = np.inf
best_weights = None
history = []
def train(model, loss_fn, optimizer):
model.train()
with tqdm.tqdm(batch_start, unit="batch", mininterval=0, disable=False) as bar:
bar.set_description(f"Epoch {epoch}")
for start in bar:
for X_train_batch, y_train_batch in train_dataloader:
y_pred = model(X_train_batch)
loss = loss_fn(y_pred, y_train_batch)
optimizer.zero_grad()
loss.backward()
optimizer.step()
bar.set_postfix(mse=float(loss))
def test(model, loss_fn):
global best_mse
global best_weights
model.eval()
y_pred = model(X_test)
mse = loss_fn(y_pred, y_test)
mse = float(mse)
history.append(mse)
if mse < best_mse:
best_mse = mse
best_weights = copy.deepcopy(model.state_dict())
loss_fn = nn.MSELoss()
optimizer = optim.Adam(model.parameters(), lr=0.0001)
for epoch in range(N_EPOCHS):
train(model, loss_fn, optimizer)
test(model, loss_fn)
model.load_state_dict(best_weights)
print("MSE: %.2f" % best_mse)
print("RMSE: %.2f" % np.sqrt(best_mse))
plt.xlabel("Epoch")
plt.ylabel("MSE")
plt.plot(history)
plt.show()
型
有两件事是意料之外的:
首先,第二段代码的执行速度比第一段代码慢得多,因为第二段代码使用了数据加载器。其次,训练结果在MSE = 0.02
和RMSE=0.15
处停止,现在完全不同了:
的
那么,我们能解释为什么这两个代码的结果不一样吗?
1条答案
按热度按时间eyh26e7m1#
编辑:
一些更新:
首先,在dataloader版本中,您不应该将循环嵌套在tqdm bar下(用于start in bar)。其次,您应该禁用train dataloader中的
shuffle=True
。通过上述更新,以下代码可以重现初始结果(MSE=0.07,RMSE-0.26)。字符串
现在让我们进入细节。
***(1)为什么它会慢:***最重要的原因是你在初始的dataloader代码中实际上让模型训练了更多的迭代。你遍历了整个dataloader,在tqdm条的嵌套循环下(你最初实现的是手动遍历)。这也解释了为什么你的初始dataloader版本的结果更好。
另外,你应该知道dataloader本身也有一定的开销:首先是自动归类(在文档https://pytorch.org/docs/stable/data.html中检查
collate_fn
)。自动排序将尝试递归地预处理数据(例如,将numpy数组转换为torch.Tensor并跨批次连接它们,如果数据是嵌套的dict,则此过程是递归的)。第二个是多处理-虽然您在这里没有使用多处理,python中的多进程本身可能很棘手,通常为子进程创建worker的成本很高。这在您在多进程DataLoader中使用persistent_workers
时尤其明显,这意味着每次完成DataLoader遍历并需要再次执行时,都会重新创建子进程,并且会有更多的计算开销。在dataloader版本中,您启用了
shuffle=True
,这意味着在每个epoch中,批次的顺序和实际组合都可以随机化,这很重要(例如:https://stats.stackexchange.com/questions/245502/why-should-we-shuffle-data-while-training-a-neural-network)现在让我们验证批处理的顺序是否重要:如果在固定数据加载器示例中保留
shuffle=True
,则输出将为MSE= 0.06
和RMSE=0.25
。