我在学整洁模特。下面的代码运行良好:
library(tidyverse)
library(tidymodels)
# Draw a random sample of 2000 to try the models
set.seed(1234)
diamonds <- diamonds %>%
sample_n(2000)
diamonds_split <- initial_split(diamonds, prop = 0.80, strata="price")
diamonds_train <- training(diamonds_split)
diamonds_test <- testing(diamonds_split)
folds <- rsample::vfold_cv(diamonds_train, v = 10, strata="price")
metric <- metric_set(rmse,rsq,mae)
# Model KNN
knn_spec <-
nearest_neighbor(
mode = "regression",
neighbors = tune("k"),
engine = "kknn"
)
knn_rec <-
recipe(price ~ ., data = diamonds_train) %>%
step_log(all_outcomes()) %>%
step_normalize(all_numeric_predictors()) %>%
step_dummy(all_nominal_predictors())
knn_wflow <-
workflow() %>%
add_model(knn_spec) %>%
add_recipe(knn_rec)
knn_grid = expand.grid(k=c(1,5,10,30))
knn_res <-
tune_grid(
knn_wflow,
resamples = folds,
metrics = metric,
grid = knn_grid
)
collect_metrics(knn_res)
autoplot(knn_res)
show_best(knn_res,metric="rmse")
# Best KNN
best_knn_spec <-
nearest_neighbor(
mode = "regression",
neighbors = 10,
engine = "kknn"
)
best_knn_wflow <-
workflow() %>%
add_model(best_knn_spec) %>%
add_recipe(knn_rec)
best_knn_fit <- last_fit(best_knn_wflow, diamonds_split)
collect_metrics(best_knn_fit)
但是,当我试图在训练集上拟合最佳模型并将其应用于测试集时,我遇到了问题。下面两行给予了错误:“step_log()
中的错误:!步骤'log_mUSAb'中的new_data
缺少以下必需列:运行rlang::last_trace()
查看错误发生的位置。”
# Predict Manually
f1 = fit(best_knn_wflow,diamonds_train)
p1 = predict(f1,new_data=diamonds_test)
1条答案
按热度按时间wfveoks01#
这个问题与TidyModels工作流中的日志转换结果变量有关
对于结果的日志转换,我们强烈建议在将它们传递给
recipe()
之前完成这些转换。这是因为在对新数据进行预测时(这是当您last_fit()
工作流时发生的事情),您不能保证有结果。食谱失败了。你在这里看到的是,当你预测一个
workflow()
对象时,它只传递预测器,因为它只需要传递预测器。这就是为什么你会看到这个错误。因为日志转换不是一个学习的转换,所以您可以安全地在之前完成它。