R语言 连接数据框和数据框(作为列表?)或创建新的变量

utugiqy6  于 2023-04-27  发布在  其他
关注(0)|答案(3)|浏览(86)

我的代码看起来像这样:

# Dataset 1 

df1 <- data.frame(date = c("2022-12-31", "2022-09-30","2022-06-30", "2022-03-31"))

### Dataset 2 

df2 <- data.frame(date = c("2022-12-31", "2022-09-30","2022-06-30", "2022-03-31"), pe_ratio = c(22.0,22.6,22.5,28.2))

### What I want 

df3 <- data.frame(date = c("2022-12-31", "2022-09-30","2022-06-30", "2022-03-31"))
df3$pe_ratio_minus_1 <- NA
df3$pe_ratio_minus_2 <- NA
df3$pe_ratio_minus_3 <- NA
df3$pe_ratio_minus_1[1] <- df2$pe_ratio[2] 
df3$pe_ratio_minus_2[1] <- df2$pe_ratio[3]
df3$pe_ratio_minus_3[1] <- df2$pe_ratio[4]

> df3
        date pe_ratio_minus_1 pe_ratio_minus_2 pe_ratio_minus_3
1 2022-12-31             22.6             22.5             28.2
2 2022-09-30               NA               NA               NA
3 2022-06-30               NA               NA               NA
4 2022-03-31               NA               NA               NA

所以我想做的是对于df 1中的日期变量,我想为pe_ratio的df2中的每个值创建一个单独的变量,用于df1中的先前值。但不应包含df1中的实际日期值。

wz3gfoph

wz3gfoph1#

我想这是你想要的,但是它的缺失值比你的结果少……

library(dplyr)
df1 %>% 
  left_join(df2, by = "date") %>%
  arrange(desc(date)) %>% # make sure things are in order
  mutate(
    pe_ratio_minus_1 = lead(pe_ratio, 1),
    pe_ratio_minus_2 = lead(pe_ratio, 2),
    pe_ratio_minus_3 = lead(pe_ratio, 3)
  ) %>%
  select(-pe_ratio)
#         date pe_ratio_minus_1 pe_ratio_minus_2 pe_ratio_minus_3
# 1 2022-12-31             22.6             22.5             28.2
# 2 2022-09-30             22.5             28.2               NA
# 3 2022-06-30             28.2               NA               NA
# 4 2022-03-31               NA               NA               NA
vsdwdz23

vsdwdz232#

使用data.table

library(data.table)
 setDT(df1)[df2, on = .(date)][order(-date),
  paste0("pe_ratio_minus_", 1:3) := shift(pe_ratio, n = 1:3, type = "lead")][]
  • 输出
date pe_ratio pe_ratio_minus_1 pe_ratio_minus_2 pe_ratio_minus_3
1: 2022-12-31     22.0             22.6             22.5             28.2
2: 2022-09-30     22.6             22.5             28.2               NA
3: 2022-06-30     22.5             28.2               NA               NA
4: 2022-03-31     28.2               NA               NA               NA
62o28rlo

62o28rlo3#

  • base* 的另一个变体使用outer来放置match艾德日期。
df1[paste0("pe_ratio_minus_", 1:3)] <- 
  df2$pe_ratio[match(df1$date, df2$date)[outer(seq_len(nrow(df1)), 1:3, `+`)]]
df1
#        date pe_ratio_minus_1 pe_ratio_minus_2 pe_ratio_minus_3
#1 2022-12-31             22.6             22.5             28.2
#2 2022-09-30             22.5             28.2               NA
#3 2022-06-30             28.2               NA               NA
#4 2022-03-31               NA               NA               NA

相关问题