我是一个用R编写函数的新手,我正在尝试计算我的一个模型的每月对称平均绝对误差(SMAPE)性能。基本函数可以工作,但计算数据集中每个月的单个值,而不是不同的值。下面是一个可重现的示例:
structure(list(date = structure(c(18948, 18949, 18950, 18951,
18952, 18953, 18954, 18955, 18956, 18957, 18958, 18959, 18960,
18961, 18962, 18963, 18964, 18965, 18966, 18967, 18968, 18969,
18970, 18971, 18972, 18973, 18974, 18975, 18976, 18977, 18978,
18979, 18980, 18981, 18982, 18983, 18984, 18985, 18986, 18987,
18988, 18989, 18990, 18991, 18992, 18993, 18994, 18995, 18996,
18997, 18998, 18999, 19000, 19001, 19002, 19003, 19004, 19005,
19006, 19007, 19008, 19009, 19010, 19011, 19012, 19013, 19014,
19015, 19016, 19017, 19018, 19019, 19020, 19021, 19022, 19023,
19024, 19025, 19026, 19027, 19028, 19029, 19030, 19031, 19032,
19033, 19034, 19035, 19036, 19037, 19038, 19039, 19040, 19041,
19042, 19043), class = "Date"), actual = c(2875, 2755, 2440,
2220, 1378, 1352, 2616, 1709, 1475, 2315, 2223, 4357, 3037, 1725,
2332, 2358, 3135, 3232, 3497, 2876, 2971, 3530, 4268, 4692, 3589,
3496, 4233, 4336, 5810, 6943, 8921, 7491, 8607, 10450, 11309,
13367, 18607, 23426, 19244, 29256, 21001, 27023, 29346, 39840,
41210, 37503, 38473, 35618, 40713, 39363, 43142, 44309, 38706,
34988, 33483, 28847, 32719, 31248, 31502, 19896, 19025, 23586,
20977, 22323, 23900, 22966, 15038, 14283, 15827, 13900, 18274,
18325, 17514, 10616, 8828, 10580, 8888, 15072, 14208, 14426,
7815, 6841, 7257, 8003, 11034, 10637, 10189, 6143, 4401, 5911,
6164, 8030, 10151, 4180, 6929, 3377), consensus2 = c(2899, 2735,
2485, 2199, 1297, 1414, 3026, 1535, 1588, 2435, 2341, 3095, 2241,
2480, 3098, 2513, 2886, 3289, 3427, 3060, 3050, 3564, 3803, 4204,
3188, 3184, 4071, 4063, 4974, 5839, 6641, 6146, 6620, 8446, 11112,
13071, 14963, 18807, 20670, 21149, 22824, 28484, 29376, 31969,
37669, 37706, 42511, 39104, 41362, 44855, 48043, 46670, 40384.96296,
42612.53704, 37730, 38351, 33813, 35651, 31475, 19364, 19364,
19892, 20436, 21114, 21221, 23002, 18035, 15320, 16292, 15735,
14726, 17844, 17635.77778, 11904.48148, 10763.7037, 9986.611111,
9986.611111, 10604.22222, 14246.90741, 14113.55556, 9113.425926,
8236.5, 8759.888889, 7436.462963, 10489.37037, 10507.09259, 9969.5,
5272.111111, 5729.092593, 5989.055556, 6245, 8267.314815, 7844.481481,
3176.703704, 8661.944444, 3320.055556)), row.names = c(NA, -96L
), class = c("tbl_df", "tbl", "data.frame"))
library(lubridate)
library(tidyverse)
data<- data %>% dplyr::select (date, actual, consensus2) %>%
dput()
data$month<- lubridate::month(data$date,label = TRUE)
data<- data %>% mutate(month= as.factor(month))
#Function
smape1 <- function(a, f) {for (i in 1:(nlevels(data$month))) { return (1/length(a) * sum(2*abs(f-a) / (abs(a)+abs(f))*100))}}
SMAPE_bymonth<- by(data,data$month, function(a,f)smape1(data$actual,data$consensus2))
SMAPE_bymonth
1条答案
按热度按时间3pmvbmvn1#
不清楚
smape1
函数中的for
循环。如果我们删除该循环并创建一个带有两个参数(a
、f
)的函数,该函数从数据中获取列,那么我们只需要按“month”分组,并通过选择这些列来应用函数或者使用
by
,lambda函数function(x)
从第一个参数返回分组的块data
,在提取列“actual”、“consensus2”而不是从整个数据(data$
)提取列“actual”、“consensus2”之后,将其用作输入参数