R语言 连接数据框和数据框或创建新的变量

xurqigkl  于 2023-04-27  发布在  其他
关注(0)|答案(2)|浏览(70)

这是我的数据:

### Data 1
df_1 <- data.frame(date = as.Date(c("2022-10-27","2022-07-28", "2022-04-28")))
df_1
### Data 2
df_2 <- data.frame(date = as.Date(c("2022-09-12","2022-06-13", "2022-03-15","2022-01-15","2021-10-21")), ratio = c(2.12,4.43,1.23,9.89,7.50))
df_2

### What I want to have:

df_3 <- data.frame(date = as.Date(c("2022-10-27","2022-07-28", "2022-04-28" )), 
                   ratio_past_1 =c(2.12,4.43,1.23), 
                   ratio_past_2 = c(4.43,1.23,9.89), 
                   ratio_past_3 = c(1.23,9.89,7.50),
                   ratio_past_4 = c(9.89,7.50,NA), 
                   ratio_past_5 = c(7.50,NA,NA))
df_3

我所知道的:

  1. df_1中的日期总是大于df_2中的日期
  2. df_1 date_i与df_2 date_i中的周期相同
    我想要的:
    1.将df_1中的日期作为df_3中的日期
    1.根据df_1中日期变量之后df_2(比率)中的观察结果创建新变量
wgx48brx

wgx48brx1#

更新2:第二个建议:这是一个更好的有自己的功能.但我不喜欢硬编码ratio_past_1:ratio_past_5

library(dplyr)
# lookup vector
vector_lookup <- c("period1" = 1:4, "period2" = 5:8, "period3" = 9:12)

# define function
add_period_column <- function(df) {
  df %>%
    mutate(month = month(ymd(date))) %>% 
    mutate(
      period = case_when(
        month %in% vector_lookup[period1] ~ "period1",
        month %in% vector_lookup[period2] ~ "period2",
        month %in% vector_lookup[period3] ~ "period3",
        TRUE ~ NA_character_
      )
    )
}

# create list of data frames
df_list <- list(df_1, df_2)

df_list_period <- lapply(df_list, add_period_column)

# join data frames
df_join <- Reduce(function(x, y) left_join(x, y, by = "period"), df_list_period)
df_join %>% 
  mutate(ratio_past_1 = ratio,
         ratio_past_2 = lead(ratio,1),
         ratio_past_3 = lead(ratio,2),
         ratio_past_4 = lead(ratio,3),
         ratio_past_5 = lead(ratio,4)
  ) %>% 
  select(date = date.x, contains("_"))

更新:第一次尝试。这里我们创建一个命名向量作为查找资源。接下来的事情是我们定义我们的句点并通过period连接

vector_lookup <- c("period1" = 1:4, "period2" = 5:8, "period3" = 9:12)

df_1 %>%
  mutate(month = month(ymd(date))) %>% 
  mutate(
    period = case_when(
      month %in% vector_lookup[period1] ~ "period1",
      month %in% vector_lookup[period2] ~ "period2",
      month %in% vector_lookup[period3] ~ "period3",
      TRUE ~ NA_character_
    )
  ) %>% 
  left_join(df_2 %>% 
              mutate(month = month(ymd(date))) %>% 
              mutate(
                period = case_when(
                  month %in% vector_lookup[period1] ~ "period1",
                  month %in% vector_lookup[period2] ~ "period2",
                  month %in% vector_lookup[period3] ~ "period3",
                  TRUE ~ NA_character_
                )), by = "period") %>% 
  mutate(ratio_past_1 = ratio,
         ratio_past_2 = lead(ratio,1),
         ratio_past_3 = lead(ratio,2),
         ratio_past_4 = lead(ratio,3),
         ratio_past_5 = lead(ratio,4)
         ) %>% 
  select(date = date.x, contains("_"))
date ratio_past_1 ratio_past_2 ratio_past_3 ratio_past_4 ratio_past_5
1 2022-10-27         2.12         7.50         4.43         1.23         9.89
2 2022-10-27         7.50         4.43         1.23         9.89           NA
3 2022-07-28         4.43         1.23         9.89           NA           NA
4 2022-04-28         1.23         9.89           NA           NA           NA
5 2022-04-28         9.89           NA           NA           NA           NA

也许是这样的:

library(dplyr)

df_1 %>%
  mutate(x = format(as.Date(date), "%Y-%m")) %>% 
  left_join(df_2 %>% 
              mutate(x = format(as.Date(date), "%Y-%m")), by = "x") %>% 
  mutate(ratio_past_1 = ratio,
         ratio_past_2 = lead(ratio,1),
         ratio_past_3 = lead(ratio,2)) %>% 
  select(date = date.x, contains("_"))

        date ratio_past_1 ratio_past_2 ratio_past_3
1 2022-10-27         2.12         4.43         1.23
2 2022-07-28         4.43         1.23           NA
3 2022-04-28         1.23           NA           NA
gcuhipw9

gcuhipw92#

这是一个非等长连接加上一个整形/轴。

library(dplyr) # dplyr_1.1.0 or later, for `join_by` and `.by=`
df_1 %>%
  mutate(rn = row_number()) %>%
  left_join(df_2, join_by(date >= date), suffix = c("", ".y")) %>%
  mutate(cn = row_number(), .by = rn) %>%
  select(-date.y, -rn) %>%
  pivot_wider(id_cols = date, names_from = "cn", values_from = "ratio", names_prefix = "ratio_past_")
# # A tibble: 3 × 4
#   date       ratio_past_1 ratio_past_2 ratio_past_3
#   <date>            <dbl>        <dbl>        <dbl>
# 1 2022-10-27         2.12         4.43         1.23
# 2 2022-07-28         4.43         1.23        NA   
# 3 2022-04-28         1.23        NA           NA

相关问题