如何在使用janitor::adorn_percents()时从总计列和总计行中排除百分比

ijxebb2r  于 2023-02-06  发布在  其他
关注(0)|答案(3)|浏览(85)

有什么方法可以直接从装饰函数中得到下面的输出吗?

library(janitor)
library(stringr)

df <- mtcars %>%
  tabyl(am, cyl) %>%
  adorn_totals(c("row", "col")) %>%
  adorn_percentages("row") %>%
  adorn_pct_formatting(digits = 2) %>%
  adorn_ns(position = "front") 

df
#     am           4          6           8        Total
#      0  3 (15.79%) 4 (21.05%) 12 (63.16%) 19 (100.00%)
#      1  8 (61.54%) 3 (23.08%)  2 (15.38%) 13 (100.00%)
#  Total 11 (34.38%) 7 (21.88%) 14 (43.75%) 32 (100.00%)

df$Total <- str_replace(df$Total, " \\s*\\([^\\)]+\\)", "")
df[df$am == "Total",] <- str_replace(df[df$am == "Total",], " \\s*\\([^\\)]+\\)", "")

df
#     am           4          6           8 Total
#      0  3 (15.79%) 4 (21.05%) 12 (63.16%)    19
#      1  8 (61.54%) 3 (23.08%)  2 (15.38%)    13
#  Total          11          7          14    32
jm2pwxwz

jm2pwxwz1#

这不仅是janitor的解决方案,而且是使用dyplrreadr的一次运行的解决方案:
我们在代码中添加一行mutate(across...,其中case_when条件仅针对特定行,并且(技巧)使用parse_number(自动提取第一个数字)。第二步是对Total列使用parse_number

library(janitor)
library(readr)
library(dplyr)
mtcars %>%
  tabyl(am, cyl) %>%
  adorn_totals(c("row", "col")) %>%
  adorn_percentages("row") %>% 
  adorn_pct_formatting(digits = 2) %>% 
  adorn_ns(position = "front") %>% 
  mutate(across(-c(am, Total), ~case_when(am == "Total" ~as.character(parse_number(.)),
                                          TRUE ~.))) %>% 
  mutate(Total = parse_number(Total))
am           4          6           8 Total
     0  3 (15.79%) 4 (21.05%) 12 (63.16%)    19
     1  8 (61.54%) 3 (23.08%)  2 (15.38%)    13
 Total          11          7          14    32
kq0g1dla

kq0g1dla2#

从本质上讲,问题是在创建百分比后要调用adorn_totals(),但不能这样做,因为这样做是在处理具有"3 (15.79%)"这样的值的字符列,不能对它们求和。
我只需要创建一个函数来计算一个数据框中的总数和另一个数据框中的百分比,并将它们连接在一起:

library(dplyr)
library(janitor)

create_formatted_totals <- function(rows, cols, dat) {
    dat_pct <- dat |>
        tabyl({{ rows }}, {{ cols }}) |>
        adorn_percentages() |>
        adorn_pct_formatting() |>
        adorn_ns(position = "front")

    totals <- dat |>
        tabyl({{ rows }}, {{ cols }}) |>
        adorn_totals(c("row", "col")) |>
        mutate(across(everything(), as.character))

    # Add row totals
    dat_pct$Total <- head(totals$Total, -1)

    # Add col totals
    dat_pct <- rbind(dat_pct, tail(totals, 1))

    return(dat_pct)
}

然后,您可以执行以下操作:

create_formatted_totals(am, cyl, mtcars)
#     am         4         6          8 Total
#      0 3 (15.8%) 4 (21.1%) 12 (63.2%)    19
#      1 8 (61.5%) 3 (23.1%)  2 (15.4%)    13
#  Total        11         7         14    32
xjreopfe

xjreopfe3#

我们可以在一些adorn函数中使用整理选择选项

library(dplyr)
library(janitor)
mtcars %>%
  tabyl(am, cyl) %>%
  adorn_totals(c("row", "col")) %>%
  adorn_percentages("row", `...` = -c(am, Total)) %>%  
  adorn_pct_formatting(digits = 2, `...` = -c(am, Total)) %>% 
  adorn_ns(position = "front", `...` = -c(am, Total)) %>% 
  mutate(across(-c(am, Total), 
   ~ replace(.x, n(), readr::parse_number(.x[n()]))))
  • 输出
am           4          6           8 Total
     0  3 (15.79%) 4 (21.05%) 12 (63.16%)    19
     1  8 (61.54%) 3 (23.08%)  2 (15.38%)    13
 Total          11          7          14    32

或者使用group_modify

mtcars %>% 
  tabyl(am, cyl) %>%
  adorn_totals(c("row", "col")) %>% 
  group_by(grp = replace(am, am != 'Total', 'Cell')) %>% 
  group_modify(~ if(.y$grp != "Total") .x %>% 
      adorn_percentages("row", `...` = -c(am, Total)) %>%  
      adorn_pct_formatting(digits = 2, `...` = -c(am, Total)) %>% 
      adorn_ns(position = "front", `...` = -c(am, Total)) else 
    .x %>% 
      mutate(across(-Total, as.character))) %>% 
      ungroup %>%
      select(-grp)
  • 输出
# A tibble: 3 × 5
  am    `4`        `6`        `8`         Total
  <chr> <chr>      <chr>      <chr>       <dbl>
1 0     3 (15.79%) 4 (21.05%) 12 (63.16%)    19
2 1     8 (61.54%) 3 (23.08%) 2 (15.38%)     13
3 Total 11         7          14             32

相关问题