非常感谢Paddle社区为开源做出的贡献!
环境
Python3.9
Paddle版本 2.6.0
cuda_12.2.r12.2/compiler.32965470_0
最小复现代码
以下是我写的 torch LSTM => Paddle LSTM 转换的函数脚本
def LSTMWithProjection_torch2paddle(lstm_paddle, lstm_torch):
# pd_model_state_dict = lstm_paddle.state_dict()
pd_model_state_dict = {}
tc_model_state_dict = lstm_torch.state_dict()
# print(
# pd_model_state_dict['lstm.weight_ih_l0'] is pd_model_state_dict['lstm.0.cell.weight_ih'],
# pd_model_state_dict['lstm.weight_hh_l0'] is pd_model_state_dict['lstm.0.cell.weight_hh'],
# pd_model_state_dict['lstm.bias_ih_l0'] is pd_model_state_dict['lstm.0.cell.bias_ih'],
# pd_model_state_dict['lstm.bias_hh_l0'] is pd_model_state_dict['lstm.0.cell.bias_hh']
# )
pd_model_state_dict['lstm.weight_ih_l0'] = paddle.to_tensor(
tc_model_state_dict['lstm.weight_ih_l0'].detach().cpu().numpy()
)
pd_model_state_dict['lstm.weight_hh_l0'] = paddle.to_tensor(
tc_model_state_dict['lstm.weight_hh_l0'].detach().cpu().numpy()
)
pd_model_state_dict['lstm.bias_ih_l0'] = paddle.to_tensor(
tc_model_state_dict['lstm.bias_ih_l0'].detach().cpu().numpy()
)
pd_model_state_dict['lstm.bias_hh_l0'] = paddle.to_tensor(
tc_model_state_dict['lstm.bias_hh_l0'].detach().cpu().numpy()
)
# # -------------------------------------------
pd_model_state_dict['lstm.0.cell.weight_ih'] = paddle.to_tensor(
tc_model_state_dict['lstm.weight_ih_l0'].detach().cpu().numpy()
)
pd_model_state_dict['lstm.0.cell.weight_hh'] = paddle.to_tensor(
tc_model_state_dict['lstm.weight_hh_l0'].detach().cpu().numpy()
)
pd_model_state_dict['lstm.0.cell.bias_ih'] = paddle.to_tensor(
tc_model_state_dict['lstm.bias_ih_l0'].detach().cpu().numpy()
)
pd_model_state_dict['lstm.0.cell.bias_hh'] = paddle.to_tensor(
tc_model_state_dict['lstm.bias_hh_l0'].detach().cpu().numpy()
)
lstm_paddle.load_dict(pd_model_state_dict)
lstm_paddle.linear.weight.set_value(
paddle.to_tensor( lstm_torch.linear.weight.data.cpu().numpy().T )
)
return lstm_paddle
这是测试代码,可以直接运行
class LSTMWithProjection_torch(torch.nn.Module):
def __init__(self, input_size, hidden_size, proj_size):
super().__init__()
self.input_size = input_size
self.hidden_size = hidden_size
self.proj_size = proj_size
self.lstm = torch.nn.LSTM(input_size, hidden_size, batch_first=True)
self.linear = torch.nn.Linear(hidden_size, proj_size, bias=False)
def forward(self, x):
# self.lstm.flatten_parameters()
o, (_, _) = self.lstm(x)
return self.linear(o)
class LSTMWithProjection(paddle.nn.Layer):
def __init__(self, input_size, hidden_size, proj_size):
super().__init__()
self.input_size = input_size
self.hidden_size = hidden_size
self.proj_size = proj_size
self.lstm = paddle.nn.LSTM(input_size, hidden_size) # batch_first=True
self.linear = paddle.nn.Linear(hidden_size, proj_size, bias_attr=False)
def forward(self, x):
# self.lstm.flatten_parameters()
o, (_, _) = self.lstm(x)
return self.linear(o)
if __name__ == "__main__":
# ---------- 测试结果 ----------
input_size, hidden_size, proj_size = 80, 768, 256
# lstm 模型
lstm_paddle = LSTMWithProjection(input_size, hidden_size, proj_size)
lstm_torch = LSTMWithProjection_torch(input_size, hidden_size, proj_size).cuda()
# lstm 参数传递
lstm_paddle = LSTMWithProjection_torch2paddle(lstm_paddle, lstm_torch)
# 输入参数
x = np.random.rand(10, 250, 80).astype("float32")
x_tc = torch.from_numpy(x).cuda()
x_pd = paddle.to_tensor(x)
lstm_paddle.lstm.could_use_cudnn = False
y_pd, (_, _) = lstm_paddle.lstm(x_pd)
y_tc, (_, _) = lstm_torch.lstm(x_tc)
y_pd = y_pd.numpy()
y_tc = y_tc.detach().cpu().numpy()
print(
abs(
y_pd - y_tc
).max()
)
y_pd = lstm_paddle(x_pd)
y_tc = lstm_torch(x_tc)
y_pd = y_pd.numpy()
y_tc = y_tc.detach().cpu().numpy()
print(
abs(
y_pd - y_tc
).max(),
f"mean: {y_pd.mean() - y_tc.mean()}",
f"std : {y_pd.std() - y_tc.std()}",
)
如果不手动关闭 cudnn 运行,则会报错
lstm_paddle.lstm.could_use_cudnn = False
报错内容:
W0607 15:11:35.219653 1625662 rnn_kernel.cu.cc:234] If the memory space of the Input WeightList is not continuous, less efficient calculation will be called. Please call flatten_parameters() to make the input memory continuous.
--------------------------------------
C++ Traceback (most recent call last):
--------------------------------------
No stack trace in paddle, may be caused by external reasons.
----------------------
Error Message Summary:
----------------------
FatalError: `Segmentation fault` is detected by the operating system.
[TimeInfo: *** Aborted at 1717744295 (unix time) try "date -d @1717744295" if you are using GNU date ***]
[SignalInfo: *** SIGSEGV (@0x0) received by PID 1625662 (TID 0x7f1aa1cb7740) from PID 0 ***]
我有俩个问题:
- Paddle的LSTM为何有两套参数? 历史遗留问题吗? 这里会返回4个True,我需要手工给这两套参数都重新加载吗,还是只需要添加第一套参数
print(
pd_model_state_dict['lstm.weight_ih_l0'] is pd_model_state_dict['lstm.0.cell.weight_ih'],
pd_model_state_dict['lstm.weight_hh_l0'] is pd_model_state_dict['lstm.0.cell.weight_hh'],
pd_model_state_dict['lstm.bias_ih_l0'] is pd_model_state_dict['lstm.0.cell.bias_ih'],
pd_model_state_dict['lstm.bias_hh_l0'] is pd_model_state_dict['lstm.0.cell.bias_hh']
)
- 即使我手动加载了全部的参数,在保存为Paddle参数文件 pdparam 后,再重新读取,如果不加上
lstm_paddle.lstm.could_use_cudnn = False
这个flag,还是会报一样的段错误
其他补充信息 Additional Supplementary Information
No response
5条答案
按热度按时间ldxq2e6h1#
torch版本是多少,我先本地复现一下
j13ufse22#
@lizexu123
cwtwac6a3#
你上面的cuda版本不是12.2吗
new9mtju4#
是CUDA12,但我当时装的就是1.13.0+cu117 😂
感觉和torch关系不大,主要是paddle的参数读取内存不连续问题?
ccrfmcuu5#
我对齐一下torch和cuda版本试试