根据R中的组对行中的计数求和

6jjcrrmo  于 2023-02-10  发布在  其他
关注(0)|答案(1)|浏览(152)

我有一个如下所示的公共数据框架。

# A tibble: 6 x 5
  Year  District  Sex   Age_gr       Count
  <chr> <chr>     <chr> <fct>        <dbl>
1 2022  District1 Male  " \"0-4\""    3361
2 2022  District1 Male  " \"10-14\""  3604
3 2022  District1 Male  " \"15-19\""  3427
4 2022  District1 Male  " \"20-24\""  4173
5 2022  District1 Male  " \"25-29\""  5554
6 2022  District1 Male  " \"30-34\""  5780

这里有30个地区,我想把5岁的年龄组划分成更大的范围,而不是5岁的范围。我在另一个问题中找到了卡米尔的一个有用的脚本:

df <-
df %>%
  mutate(Age_gr = as.factor(Age_gr), 
forcats::fct_collapse(" \"0-4\""   = c(" \"0-4\""),
                                     " \"5-14\"" = c(" \"5-9\"", 
                                     " \"10-14\""),
                      " \"15-29\"" = c(" \"15-19\"",  " \"20-24\"",
                                       \"25-29\""), 
                      " \"30-44\"" = c(" \"30-34\"", " \"35-39\"", 
                                       \"40-44\""), 
                      " \"45-64\"" = c(" \"45-49\"", " \"50-54\"", 
                                       \"55-59\"", " \"60-64\""), 
                      " \"65+\""   = c(" \"65-69\"", " \"70-74\"", 
                                       " \"75-79\"", " \"80-84\"", 
                                       " \"85-89\"", " \"90+\""))) %>%
  group_by(Age_gr) %>%
  summarise(Count = sum(Count))

但是,我有下面的错误:

Error in `mutate()`:
i In argument: `forcats::fct_collapse(...)`.
Caused by error in `check_factor()`:
! argument ".f" is missing, with no default
Run `rlang::last_error()` to see where the error occurred.

 rlang::last_error()
<error/dplyr:::mutate_error>
Error in `mutate()`:
i In argument: `forcats::fct_collapse(...)`.
Caused by error in `check_factor()`:
! argument ".f" is missing, with no default
---
Backtrace:
  1. ... %>% summarise(Sayi = sum(Sayi))
 11. forcats::fct_collapse(...)
 12. forcats:::check_factor(.f)
Run `rlang::last_trace()` to see the full context.

 rlang::last_trace()
<error/dplyr:::mutate_error>
Error in `mutate()`:
i In argument: `forcats::fct_collapse(...)`.
Caused by error in `check_factor()`:
! argument ".f" is missing, with no default
---
Backtrace:
     x
  1. +-... %>% summarise(Sayi = sum(Sayi))
  2. +-dplyr::summarise(., Sayi = sum(Sayi))
  3. +-dplyr::group_by(., Yas_gr)
  4. +-dplyr::mutate(...)
  5. +-dplyr:::mutate.data.frame(...)
  6. | \-dplyr:::mutate_cols(.data, dplyr_quosures(...), by)
  7. |   +-base::withCallingHandlers(...)
  8. |   \-dplyr:::mutate_col(dots[[i]], data, mask, new_columns)
  9. |     \-mask$eval_all_mutate(quo)
 10. |       \-dplyr (local) eval()
 11. +-forcats::fct_collapse(...)
 12. | \-forcats:::check_factor(.f)
 13. \-base::.handleSimpleError(...)
 14.   \-dplyr (local) h(simpleError(msg, call))
 15.     \-rlang::abort(message, class = error_class, parent = parent, call = error_call)
b1zrtrql

b1zrtrql1#

fct_collapse将字符向量或因子作为参数,而不是数据框,因此它不能在pipe语句中工作,而是在mutate语句中工作,并且您提供因子列作为它的第一个参数。

df %>%
  mutate(Age_gr = fct_collapse(Age_gr, [REST_OF_CODE]))

也有一些错误是由于缺少括号造成的。我建议不要在列名上加上双引号,因为这会使编码变得非常困难。你可以像我下面所做的那样使用单引号。
这能生产出你想要的产品吗?

library(tidyverse)
#> Warning: package 'tidyverse' was built under R version 4.1.3
#> Warning: package 'ggplot2' was built under R version 4.1.3
#> Warning: package 'tibble' was built under R version 4.1.3
#> Warning: package 'tidyr' was built under R version 4.1.3
#> Warning: package 'readr' was built under R version 4.1.3
#> Warning: package 'purrr' was built under R version 4.1.3
#> Warning: package 'dplyr' was built under R version 4.1.3
#> Warning: package 'stringr' was built under R version 4.1.3
#> Warning: package 'forcats' was built under R version 4.1.3

df <- structure(list(
  Year = c("2022", "2022", "2022", "2022", "2022", "2022"),
  District = c(
    "District1", "District1", "District1", "District1",
    "District1", "District1"
  ), Sex = c(
    "Male", "Male", "Male", "Male",
    "Male", "Male"
  ), Age_gr = structure(c(1L, 3L, 4L, 5L, 6L, 7L),
    .Label = c(
      " '0-4'", " '5-9'", " '10-14'", " '15-19'",
      " '20-24'", " '25-29'", " '30-34'", " '35-39'",
      " '40-44'", " '45-49'", " '50-54'", " '55-59'",
      " '60-64'", " '65-69'", " '70-74'", " '75-79'",
      " '80-84'", " '85-89'", " '90+'"
    ), class = "factor"
  ),
  Count = c(3361, 3604, 3427, 4173, 5554, 5780)
), row.names = c(
  NA,
  -6L
), class = c("tbl_df", "tbl", "data.frame"))

df
#> # A tibble: 6 x 5
#>   Year  District  Sex   Age_gr     Count
#>   <chr> <chr>     <chr> <fct>      <dbl>
#> 1 2022  District1 Male  " '0-4'"    3361
#> 2 2022  District1 Male  " '10-14'"  3604
#> 3 2022  District1 Male  " '15-19'"  3427
#> 4 2022  District1 Male  " '20-24'"  4173
#> 5 2022  District1 Male  " '25-29'"  5554
#> 6 2022  District1 Male  " '30-34'"  5780

df <- df %>%
  mutate(Age_gr = as.factor(Age_gr)) %>%
  mutate(new_fct = forcats::fct_collapse(Age_gr,
    " '0-4'" = c(" '0-4'"),
    " '5-14'" = c(
      " '5-9'",
      " '10-14'"
    ),
    " '15-29'" = c(
      " '15-19'", " '20-24'",
      "'25-29'"
    ),
    " '30-44'" = c(
      " '30-34'", " '35-39'",
      "'40-44'"
    ),
    " '45-64'" = c(
      " '45-49'", " '50-54'",
      "'55-59'", " '60-64'"
    ),
    " '65+'" = c(
      " '65-69'", " '70-74'",
      " '75-79'", " '80-84'",
      " '85-89'", " '90+'"
    )
  ))
#> Warning: Unknown levels in `f`: '25-29', '40-44', '55-59'

df %>%
  group_by(new_fct) %>%
  summarise(Count = sum(Count))
#> # A tibble: 5 x 2
#>   new_fct    Count
#>   <fct>      <dbl>
#> 1 " '0-4'"    3361
#> 2 " '5-14'"   3604
#> 3 " '15-29'"  7600
#> 4 " '25-29'"  5554
#> 5 " '30-44'"  5780

创建于2023年2月7日,使用reprex v2.0.2
https://forcats.tidyverse.org/reference/fct_collapse.html

相关问题