如何正确使用GAM方法进行R中的时间序列插值

yc0p9oo0  于 2023-06-27  发布在  其他
关注(0)|答案(1)|浏览(77)

我有数据

mydata=structure(list(geom_id = c(5482620L, 5482620L, 5482620L, 5482620L, 
5482620L, 5482620L, 5482620L, 5482620L, 5482620L, 5482620L, 5482620L, 
5482620L, 5482620L, 5482620L, 5482620L, 5482620L, 5482620L, 5482620L, 
5482620L), lon = c(77.72114, 77.72114, 77.72114, 77.72114, 77.72114, 
77.72114, 77.72114, 77.72114, 77.72114, 77.72114, 77.72114, 77.72114, 
77.72114, 77.72114, 77.72114, 77.72114, 77.72114, 77.72114, 77.72114
), lat = c(19.36754, 19.36754, 19.36754, 19.36754, 19.36754, 
19.36754, 19.36754, 19.36754, 19.36754, 19.36754, 19.36754, 19.36754, 
19.36754, 19.36754, 19.36754, 19.36754, 19.36754, 19.36754, 19.36754
), acqdate = c("01.05.2022", "06.05.2022", "11.05.2022", "16.05.2022", 
"21.05.2022", "26.05.2022", "31.05.2022", "05.06.2022", "10.06.2022", 
"15.06.2022", "20.06.2022", "25.06.2022", "30.06.2022", "05.07.2022", 
"10.07.2022", "15.07.2022", "20.07.2022", "25.07.2022", "30.07.2022"
), b02 = c(488L, 539L, 1653L, 16566L, 295L, 527L, 529L, 24330L, 
233L, 1452L, 8063L, 3571L, 3685L, 5543L, 7317L, 4964L, 4358L, 
8296L, 12930L), b03 = c(684L, 691L, 1653L, 13535L, 646L, 684L, 
742L, 19647L, 522L, 1426L, 7677L, 2871L, 3202L, 5503L, 7328L, 
4508L, 3637L, 7716L, 10320L), b04 = c(866L, 954L, 1811L, 11540L, 
908L, 860L, 918L, 16673L, 754L, 1437L, 7358L, 2748L, 2911L, 5517L, 
7393L, 4212L, 3293L, 7451L, 8774L), b05 = c(945L, 1064L, 1990L, 
12704L, 1096L, 1027L, 1007L, 17654L, 898L, 1660L, 7860L, 2597L, 
3135L, 5067L, 7205L, 4822L, 3716L, 8391L, 9021L), b06 = c(1092L, 
1165L, 2147L, 11628L, 1188L, 1129L, 1021L, 16135L, 1007L, 1756L, 
7380L, 2614L, 3121L, 5353L, 7346L, 4620L, 4034L, 8377L, 7877L
)), class = "data.frame", row.names = c(NA, -19L))

我想在3个步骤中插入数据(dates-acqdate)。例如,这是一个完整的数据,这是一个多维时间序列。我需要插值在三个步骤(3日期)
为此,我想使用库(用于使用GAM方法)

library(mgcv)
  mgcv:gam().

gam_interp(
   formula = NULL,
   y,
   time,
   predict_times,
   se.fit = T,
   s_args = NULL,
   uncertainty type,
   verbose = F
)

但我遇到了困难,即我不明白如何正确地使用GAM方法提前3步插值时间序列(我明白该公式仅适用于线性回归?))
这里我清除了需要插值的行

dput()

structure(list(geom_id = c(5482620L, 5482620L, 5482620L, 5482620L, 
5482620L, 5482620L, 5482620L, 5482620L, 5482620L, 5482620L, 5482620L, 
5482620L, 5482620L, 5482620L, 5482620L, 5482620L, 5482620L, 5482620L, 
5482620L), lon = c(77.72114, 77.72114, 77.72114, 77.72114, 77.72114, 
77.72114, 77.72114, 77.72114, 77.72114, 77.72114, 77.72114, 77.72114, 
77.72114, 77.72114, 77.72114, 77.72114, 77.72114, 77.72114, 77.72114
), lat = c(19.36754, 19.36754, 19.36754, 19.36754, 19.36754, 
19.36754, 19.36754, 19.36754, 19.36754, 19.36754, 19.36754, 19.36754, 
19.36754, 19.36754, 19.36754, 19.36754, 19.36754, 19.36754, 19.36754
), acqdate = c("01.05.2022", "", "", "", "21.05.2022", "", "", 
"", "10.06.2022", "", "", "", "30.06.2022", "", "", "", "20.07.2022", 
"", ""), b02 = c(488L, NA, NA, NA, 295L, NA, NA, NA, 233L, NA, 
NA, NA, 3685L, NA, NA, NA, 4358L, NA, NA), b03 = c(684L, NA, 
NA, NA, 646L, NA, NA, NA, 522L, NA, NA, NA, 3202L, NA, NA, NA, 
3637L, NA, NA), b04 = c(866L, NA, NA, NA, 908L, NA, NA, NA, 754L, 
NA, NA, NA, 2911L, NA, NA, NA, 3293L, NA, NA), b05 = c(945L, 
NA, NA, NA, 1096L, NA, NA, NA, 898L, NA, NA, NA, 3135L, NA, NA, 
NA, 3716L, NA, NA), b06 = c(1092L, NA, NA, NA, 1188L, NA, NA, 
NA, 1007L, NA, NA, NA, 3121L, NA, NA, NA, 4034L, NA, NA)), class = "data.frame", row.names = c(NA, 
-19L))

如何正确设置公式中的条件,使数据在时间序列中使用GAM方法恢复(必须恢复NA,即插值)?
谢谢你的帮助。

0tdrvxhp

0tdrvxhp1#

要使用gam_interp(),我认为您需要使用标准化时间填充predict_times参数,如

seq(min(mydata$acqdate), max(mydata$acqdate), by="1 min")

关于y= mydata$b02, time =mydata$acqdate

相关问题