追加对R中选定行求和的行

hfwmuf9z  于 2023-07-31  发布在  其他
关注(0)|答案(3)|浏览(71)

在R中工作,我有一个数据框df,它显示每个年级的学生人数。

df <- structure(list(Grade = c("PK3", "PK4", "KG", "Grade 1", "Grade 2", 
"Grade 3", "Grade 4", "Grade 5", "Grade 6", "Grade 7", "Grade 8", 
"Grade 9", "Grade 10", "Grade 11", "Grade 12", "Ungraded"), Enrolled = c(4967, 
6481, 7378, 7041, 6760, 6590, 6473, 6191, 5790, 5693, 5614, 7254, 
4951, 4250, 3792, 238)), row.names = c(NA, -16L), class = c("tbl_df", 
"tbl", "data.frame"))

我想在末尾添加两行:一个显示K-12年级的总注册人数,另一个显示PK-12年级的总注册人数。我使用{janitor}包编写了代码来实现这一点,但这段代码似乎不必要地冗长和复杂。有没有更好的方法来告诉代码追加一行来对第3行到第16行求和,再追加一行来对第1行到第16行求和?

df2 = df %>% filter(Grade != "PK3" & Grade != "PK4") %>%
  adorn_totals(where="row", name="k_12_total") %>%
  filter(Grade == "k_12_total")
df = rbind(df, df2)

df2 = df %>% filter(Grade != "k_12_total") %>%
  adorn_totals(where="row", name="pk_12_total") %>%
  filter(Grade == "pk_12_total")
df = rbind(df, df2)
pwuypxnk

pwuypxnk1#

就像这样:

library(dplyr)
library(janitor)

df %>%
  bind_rows(
    df %>%
      filter(!grepl("PK", Grade)) %>% 
      adorn_totals(where = "row", name = "k_12_total"),
    df %>%
      filter(Grade != "k_12_total") %>%
      adorn_totals(where = "row", name = "pk_12_total")
  ) %>% 
  distinct()
A tibble: 18 × 2
   Grade       Enrolled
   <chr>          <dbl>
 1 PK3             4967
 2 PK4             6481
 3 KG              7378
 4 Grade 1         7041
 5 Grade 2         6760
 6 Grade 3         6590
 7 Grade 4         6473
 8 Grade 5         6191
 9 Grade 6         5790
10 Grade 7         5693
11 Grade 8         5614
12 Grade 9         7254
13 Grade 10        4951
14 Grade 11        4250
15 Grade 12        3792
16 Ungraded         238
17 k_12_total     78015
18 pk_12_total    89463
cqoc49vn

cqoc49vn2#

执行两次adorn_totals()会使数据大小膨胀。您需要一个额外的distinct()来删除重复的行。这似乎是内存消耗和效率低下。
您可以使用以下方法:

library(dplyr)
library(tidyr)

df %>%
  summarise(k_12_total  = sum(Enrolled[!grepl("PK", Grade)]),
            pk_12_total = sum(Enrolled)) %>%
  pivot_longer(everything(), names_to = "Grade", values_to = "Enrolled") %>%
  bind_rows(df, .)

# # A tibble: 18 × 2
#    Grade       Enrolled
#    <chr>          <dbl>
#  1 PK3             4967
#  2 PK4             6481
#  3 KG              7378
#  4 Grade 1         7041
#  5 Grade 2         6760
#  6 Grade 3         6590
#  7 Grade 4         6473
#  8 Grade 5         6191
#  9 Grade 6         5790
# 10 Grade 7         5693
# 11 Grade 8         5614
# 12 Grade 9         7254
# 13 Grade 10        4951
# 14 Grade 11        4250
# 15 Grade 12        3792
# 16 Ungraded         238
# 17 k_12_total     78015
# 18 pk_12_total    89463
ars1skjm

ars1skjm3#

如果我们只是简单地将两个单行 Dataframe 与您想要的总和相加。假设你已经知道了指数,就像:

rbind(df, 
      data.frame(Grade = "k_12_total", Enrolled = sum(df$Enrolled[3:16])),
      data.frame(Grade = "pk_12_total", Enrolled = sum(df$Enrolled[1:16])))
#>          Grade Enrolled
#> 1          PK3     4967
#> 2          PK4     6481
#> 3           KG     7378
#> 4      Grade 1     7041
#> 5      Grade 2     6760
#> 6      Grade 3     6590
#> 7      Grade 4     6473
#> 8      Grade 5     6191
#> 9      Grade 6     5790
#> 10     Grade 7     5693
#> 11     Grade 8     5614
#> 12     Grade 9     7254
#> 13    Grade 10     4951
#> 14    Grade 11     4250
#> 15    Grade 12     3792
#> 16    Ungraded      238
#> 17  k_12_total    78015
#> 18 pk_12_total    89463

相关问题