按月计算SMAPE的R函数有什么问题？

我是一个用R编写函数的新手，我正在尝试计算我的一个模型的每月对称平均绝对误差（SMAPE）性能。基本函数可以工作，但计算数据集中每个月的单个值，而不是不同的值。下面是一个可重现的示例：

structure(list(date = structure(c(18948, 18949, 18950, 18951, 
18952, 18953, 18954, 18955, 18956, 18957, 18958, 18959, 18960, 
18961, 18962, 18963, 18964, 18965, 18966, 18967, 18968, 18969, 
18970, 18971, 18972, 18973, 18974, 18975, 18976, 18977, 18978, 
18979, 18980, 18981, 18982, 18983, 18984, 18985, 18986, 18987, 
18988, 18989, 18990, 18991, 18992, 18993, 18994, 18995, 18996, 
18997, 18998, 18999, 19000, 19001, 19002, 19003, 19004, 19005, 
19006, 19007, 19008, 19009, 19010, 19011, 19012, 19013, 19014, 
19015, 19016, 19017, 19018, 19019, 19020, 19021, 19022, 19023, 
19024, 19025, 19026, 19027, 19028, 19029, 19030, 19031, 19032, 
19033, 19034, 19035, 19036, 19037, 19038, 19039, 19040, 19041, 
19042, 19043), class = "Date"), actual = c(2875, 2755, 2440, 
2220, 1378, 1352, 2616, 1709, 1475, 2315, 2223, 4357, 3037, 1725, 
2332, 2358, 3135, 3232, 3497, 2876, 2971, 3530, 4268, 4692, 3589, 
3496, 4233, 4336, 5810, 6943, 8921, 7491, 8607, 10450, 11309, 
13367, 18607, 23426, 19244, 29256, 21001, 27023, 29346, 39840, 
41210, 37503, 38473, 35618, 40713, 39363, 43142, 44309, 38706, 
34988, 33483, 28847, 32719, 31248, 31502, 19896, 19025, 23586, 
20977, 22323, 23900, 22966, 15038, 14283, 15827, 13900, 18274, 
18325, 17514, 10616, 8828, 10580, 8888, 15072, 14208, 14426, 
7815, 6841, 7257, 8003, 11034, 10637, 10189, 6143, 4401, 5911, 
6164, 8030, 10151, 4180, 6929, 3377), consensus2 = c(2899, 2735, 
2485, 2199, 1297, 1414, 3026, 1535, 1588, 2435, 2341, 3095, 2241, 
2480, 3098, 2513, 2886, 3289, 3427, 3060, 3050, 3564, 3803, 4204, 
3188, 3184, 4071, 4063, 4974, 5839, 6641, 6146, 6620, 8446, 11112, 
13071, 14963, 18807, 20670, 21149, 22824, 28484, 29376, 31969, 
37669, 37706, 42511, 39104, 41362, 44855, 48043, 46670, 40384.96296, 
42612.53704, 37730, 38351, 33813, 35651, 31475, 19364, 19364, 
19892, 20436, 21114, 21221, 23002, 18035, 15320, 16292, 15735, 
14726, 17844, 17635.77778, 11904.48148, 10763.7037, 9986.611111, 
9986.611111, 10604.22222, 14246.90741, 14113.55556, 9113.425926, 
8236.5, 8759.888889, 7436.462963, 10489.37037, 10507.09259, 9969.5, 
5272.111111, 5729.092593, 5989.055556, 6245, 8267.314815, 7844.481481, 
3176.703704, 8661.944444, 3320.055556)), row.names = c(NA, -96L
), class = c("tbl_df", "tbl", "data.frame"))


library(lubridate)
library(tidyverse)

 data<- data %>% dplyr::select (date, actual, consensus2) %>% 
 dput() 

data$month<- lubridate::month(data$date,label = TRUE)
data<- data %>% mutate(month= as.factor(month))

#Function

smape1 <- function(a, f)  {for (i in 1:(nlevels(data$month))) { return (1/length(a) * sum(2*abs(f-a) / (abs(a)+abs(f))*100))}} 

SMAPE_bymonth<- by(data,data$month, function(a,f)smape1(data$actual,data$consensus2))
 
SMAPE_bymonth

不清楚smape1函数中的for循环。如果我们删除该循环并创建一个带有两个参数（a、f）的函数，该函数从数据中获取列，那么我们只需要按“month”分组，并通过选择这些列来应用函数

library(dplyr)
smape2 <- function(a, f) 
    {

    return(1/length(a) * sum(2*abs(f-a) / (abs(a)+abs(f))*100))
}
data %>%
    group_by(month) %>% 
    summarise(smape = smape2(actual, consensus2), .groups = 'drop')
# A tibble: 4 × 2
  month smape
  <ord> <dbl>
1 Jan    8.87
2 Feb   12.1 
3 Nov   11.3 
4 Dec   12.0

或者使用by，lambda函数function(x)从第一个参数返回分组的块data，在提取列“actual”、“consensus2”而不是从整个数据（data$）提取列“actual”、“consensus2”之后，将其用作输入参数

by(data, droplevels(data$month), function(x) smape2(x$actual,x$consensus2))
droplevels(data$month): Jan
[1] 8.870074
----------------------------------------------------------------------------------------------------------------------- 
droplevels(data$month): Feb
[1] 12.05893
----------------------------------------------------------------------------------------------------------------------- 
droplevels(data$month): Nov
[1] 11.26306
----------------------------------------------------------------------------------------------------------------------- 
droplevels(data$month): Dec
[1] 11.96994

按月计算SMAPE的R函数有什么问题？

1条答案

相关问题

热门标签

最新问答