在R中一次旋转表多个变量

bpsygsoo  于 2023-11-14  发布在  其他
关注(0)|答案(4)|浏览(94)

我有一个 Dataframe ,看起来像这样:

df <- data.frame(residence_pre = c("house", "apartment", "house"),
                 residence_during = c("house", "apartment", "house"),
                 residence_after = c("house", "apartment", "house"),
                 family_pre = c(1,2,3),
                 family_during = c(2,2,4),
                 family_after = c(1,2,4))

字符串
我想用一种看起来像这样的方式来透视它:(这是示例数据。在真实的数据框架中有更多的列,都遵循这种模式)

pivot <- data.frame(obs.number = c(1, 1, 1, 2, 2, 2, 3, 3, 3),
                    residence = c("house", "house", "house", "apartment", "apartment", "apartment", "house", "house", "house"),
                    family = c(1, 2, 1, 2, 2, 2, 3, 4, 4),
                    status = c("pre", "during", "after", "pre", "during", "after", "pre", "during", "after"))

3bygqnnd

3bygqnnd1#

在添加obs.number之后,我们可以在一个pivot步骤中完成:

library(tidyverse)
df |>
  mutate(obs.number = row_number()) |>
  pivot_longer(-obs.number, names_to = c(".value", "status"),
               values_to = "family", names_sep = "_")
  # values_to = "family" is to assign the values to a column called "family",
  #   instead of the default column name "value"

# A tibble: 9 × 4
  obs.number status residence family
       <int> <chr>  <chr>      <dbl>
1          1 pre    house          1
2          1 during house          2
3          1 after  house          1
4          2 pre    apartment      2
5          2 during apartment      2
6          2 after  apartment      2
7          3 pre    house          3
8          3 during house          4
9          3 after  house          4

字符串

5lwkijsr

5lwkijsr2#

这里有一个方法,你可以尝试

df2 <- df %>% mutate(obs.number=row_number()) %>% 
  pivot_longer(cols = c(residence_pre,residence_during,residence_after),
               names_to = 'status', values_to = 'residence', 
               names_prefix='residence_') %>% select(status,residence,obs.number)

df3<- df %>% mutate(obs.number=row_number()) %>% 
  pivot_longer(cols = c(family_pre,family_during,family_after),
               names_to = 'status', values_to = 'family', 
               names_prefix='family_') %>% select(status,family,obs.number)

df4 <- df2 %>% left_join(df3, by=c('status','obs.number'))

# A tibble: 9 × 4
  status residence obs.number family
  <chr>  <chr>          <int>  <dbl>
1 pre    house              1      1
2 during house              1      2
3 after  house              1      1
4 pre    apartment          2      2
5 during apartment          2      2
6 after  apartment          2      2
7 pre    house              3      3
8 during house              3      4
9 after  house              3      4

字符串

p3rjfoxz

p3rjfoxz3#

如果数据集的其余部分确实使用了相同的列模式,那么类似这样的方法可能会起作用-我们首先将数据集按列分成部分(residencefamily),分别透视部分,然后将left_join()应用于结果tibble列表中,直到purrr::reduce()

library(dplyr)
library(tidyr)
library(purrr)
library(stringr)

df <- data.frame(residence_pre = c("house", "apartment", "house"),
                 residence_during = c("house", "apartment", "house"),
                 residence_after = c("house", "apartment", "house"),
                 family_pre = c(1,2,3),
                 family_during = c(2,2,4),
                 family_after = c(1,2,4))

# pivot a single section, a set of related columns in df
pivot_section <- function(df_){
  mutate(df_, obs.number = row_number(), .before = 1) %>% 
    pivot_longer(cols =  -obs.number, names_sep = "_", names_to = c(".value", "status"))
}

# collect available sections (residence, family)
sections <- names(df) %>% str_split_i("_", 1) %>% unique()
sections
#> [1] "residence" "family"

# organize columns to separate frames by sections (residence, family),
# pivot every section frame,
# join all sections by obs.number and status
map(sections, \(sect) select(df, starts_with(sect))) %>% 
  map(pivot_section) %>% 
  reduce(left_join, by = join_by(obs.number, status))

字符串
测试结果:

#> # A tibble: 9 × 4
#>   obs.number status residence family
#>        <int> <chr>  <chr>      <dbl>
#> 1          1 pre    house          1
#> 2          1 during house          2
#> 3          1 after  house          1
#> 4          2 pre    apartment      2
#> 5          2 during apartment      2
#> 6          2 after  apartment      2
#> 7          3 pre    house          3
#> 8          3 during house          4
#> 9          3 after  house          4


创建于2023-10-28使用reprex v2.0.2

d7v8vwbk

d7v8vwbk4#

考虑基数R到reshape两个 * residence * 和 * family * 集合,然后merge通过 * id * 和 * status * 字段:

df_list <- lapply(
    c("residence", "family"),
    \(x) reshape(
        df[grep(x, colnames(df))],
        varying = colnames(df)[grep(x, colnames(df))],
        times = gsub(paste0(x, "_"), "", colnames(df)[grep(x, colnames(df))]),
        v.names = x,
        timevar = "status",
        direction = "long",
        sep = "_",
        new.row.names = 1:1E5
   )
) 

final_df <- Reduce(
    \(x,y) merge(x, y, by=c("id", "status")), df_list
)

final_df
#   id status residence family
# 1  1  after     house      1
# 2  1 during     house      2
# 3  1    pre     house      1
# 4  2  after apartment      2
# 5  2 during apartment      2
# 6  2    pre apartment      2
# 7  3  after     house      4
# 8  3 during     house      4
# 9  3    pre     house      3

字符串
Online Demo

相关问题