R语言 是否有方法在geom_point中为每个组绘制%

igetnqfo  于 2023-06-03  发布在  其他
关注(0)|答案(1)|浏览(233)

我想在X轴上绘制每个治疗分配中出现不同疾病的患者百分比。然而,使用下面的代码,我只能得到每个治疗分配中出现每种疾病的患者的绝对数量。示例= 5名分配到TB治疗组的患者存在贫血,我需要的是14%的分配到TB治疗组的患者存在贫血。

ggplot(mydata[, aes(x = disease2, y = (..count../102)*100, colour = treatment_allocation)) + 
  geom_point(stat = "count", position = "dodge", aes(shape=treatment_allocation), size=4) + 
  coord_flip() + 
  labs(x="", y="% of patients") + 
  scale_colour_manual(values=c("grey","goldenrod3", "deepskyblue3", "seagreen3")) + 
  scale_y_continuous(breaks = c(0,5,10,15,20,25,30,35,40,45,50,55,60,65,70,75,80,85,90,95)) + 
  scale_shape_manual(values=c(15,16,17,18))

数据为:

mydata <- structure(list(
  sae_safety_coordinator = structure(c(
    2L, 2L,
    2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 4L, 2L, 2L,
    2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 4L, 2L, 2L, 2L, 2L, 2L, 2L, 2L,
    2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L,
    2L, 2L, 2L, 4L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L,
    2L, 2L, 2L, 2L, 2L, 2L, 2L, 4L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L,
    2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L,
    2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L,
    2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 4L, 2L, 2L, 2L, 2L, 2L, 2L,
    2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 4L, 2L, 2L,
    2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L,
    2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L,
    2L, 2L, 2L, 2L, 2L, 4L, 4L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L,
    2L, 2L, 2L, 2L, 2L, 2L, 2L, 4L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L,
    2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L,
    2L, 2L, 2L, 2L, 2L, 2L, 2L
  ), .Label = c(
    "AE", "SAE", "AR", "SAR",
    "UAR", "SUSAR"
  ), class = "factor"), treatment_allocation =
    structure(c(
      2L, 4L, 4L, 1L, 1L, 1L, 2L, 2L, 3L, 3L, 3L, 4L, 1L, 1L,
      1L, 1L, 2L, 2L, 1L, 1L, 1L, 2L, 2L, 3L, 4L, 3L, 4L, 4L, 4L, 4L, 4L,
      4L, 3L, 1L, 2L, 3L, 3L, 3L, 3L, 3L, 3L, 4L, 4L, 4L, 3L, 1L, 1L, 1L,
      1L, 1L, 1L, 1L, 1L, 2L, 2L, 2L, 2L, 4L, 3L, 3L, 3L, 1L, 4L, 3L, 2L,
      1L, 1L, 3L, 2L, 4L, 4L, 1L, 1L, 3L, 3L, 3L, 4L, 4L, 4L, 2L, 3L, 3L,
      3L, 3L, 3L, 3L, 1L, 1L, 1L, 1L, 1L, 2L, 2L, 2L, 4L, 4L, 1L, 2L, 1L,
      1L, 1L, 1L, 3L, 3L, 3L, 4L, 4L, 4L, 1L, 2L, 2L, 3L, 3L, 2L, 1L, 4L,
      3L, 4L, 4L, 4L, 4L, 4L, 1L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 2L, 4L, 4L,
      3L, 3L, 3L, 2L, 3L, 4L, 4L, 4L, 1L, 1L, 1L, 2L, 2L, 2L, 4L, 4L, 3L,
      4L, 4L, 4L, 4L, 4L, 2L, 2L, 1L, 1L, 1L, 1L, 3L, 3L, 3L, 3L, 3L, 3L,
      3L, 3L, 1L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 1L, 1L, 1L, 4L, 3L, 2L,
      4L, 2L, 1L, 1L, 4L, 4L, 4L, 4L, 1L, 4L, 4L, 4L, 4L, 4L, 4L, 2L, 4L,
      4L, 4L, 1L, 3L, 3L, 1L, 1L, 3L, 2L, 2L, 2L, 4L, 4L, 4L, 1L, 1L, 1L,
      1L, 2L, 3L, 4L, 3L, 2L, 2L, 2L, 1L, 1L, 2L, 2L, 2L, 4L, 4L, 1L, 4L,
      2L, 2L, 2L, 1L, 2L, 2L, 2L, 3L, 4L, 4L, 1L, 1L
    ), .Label = c("Standard
of Care", "TB-Treatment", "Valganciclovir", "TB-Treatment +
Valganciclovir"), class = "factor"),
  disease2 = structure(c(
    41L, 41L, 36L, 42L, 19L, 10L, 11L,
    10L, 4L, 11L, 41L, 28L, 42L, 26L, 41L, 2L, 15L, 28L, 42L,
    19L, 19L, 2L, 11L, 34L, 41L, 42L, 15L, 11L, 12L, 42L, 41L,
    19L, 42L, 41L, 19L, 42L, 41L, 8L, 19L, 4L, 19L, 28L, 26L,
    42L, 42L, 42L, 43L, 48L, 41L, 42L, 37L, 27L, 41L, 33L, 41L,
    28L, 8L, 36L, 12L, 43L, 2L, 41L, 43L, 42L, 41L, 41L, 42L,
    42L, 41L, 33L, 12L, 37L, 42L, 41L, 36L, 41L, 41L, 12L, 28L,
    41L, 38L, 12L, 41L, 37L, 42L, 2L, 41L, 28L, 28L, 11L, 28L,
    1L, 41L, 35L, 36L, 26L, 41L, 41L, 10L, 48L, 42L, 2L, 42L,
    29L, 8L, 4L, 41L, 4L, 42L, 2L, 41L, 42L, 37L, 11L, 41L, 41L,
    42L, 41L, 8L, 11L, 46L, 26L, 41L, 28L, 44L, 2L, 36L, 42L,
    41L, 48L, 41L, 11L, 42L, 10L, 41L, 41L, 28L, 11L, 41L, 25L,
    11L, 27L, 42L, 11L, 42L, 36L, 43L, 42L, 28L, 42L, 42L, 3L,
    12L, 41L, 2L, 36L, 28L, 3L, 42L, 28L, 28L, 41L, 35L, 35L,
    43L, 11L, 33L, 37L, 21L, 41L, 42L, 19L, 28L, 2L, 42L, 9L,
    41L, 47L, 36L, 41L, 41L, 42L, 39L, 2L, 41L, 41L, 40L, 41L,
    26L, 11L, 19L, 4L, 26L, 41L, 13L, 11L, 41L, 47L, 2L, 19L,
    36L, 42L, 38L, 41L, 36L, 41L, 42L, 26L, NA, 42L, 11L, 41L,
    19L, 11L, 3L, 26L, 42L, 37L, 5L, 28L, 28L, 32L, 19L, 41L,
    19L, 13L, 11L, 20L, 33L, 5L, 42L, 4L, 42L, 19L, 41L, 28L,
    19L, 42L, 41L, 11L, 41L, 37L, 42L, 40L, 41L, 24L, 28L
  ), .Label = c(
    "Nosocomial condition",
    "Unattended death", "IRIS", "Malaria", "Acidosis", "Abnormality of albumin",
    "Hypo-osmolality and hyponatremia", "Elevated transaminases",
    "Lymphocytopenia", "Thrombocytopenia", "Anemia", "Neutropenia",
    "Leukopenia", "Sickle-cell disease without crisis", "Skin eruption",
    "Scabies", "Hookworm disease, unspecified", "Injury of kidney",
    "Sepsis", "Urinary tract infection, site not specified",
    "Acute kidney failure, unspecified", "Balanitis", "Unspecified hemorrhoids",
    "Necrotizing ulcerative stomatitis", "Hemorrhagic condition",
    "Nutritional marasmus", "HIV complications", "Gastroenteritis and colitis",
    "Abdominal pain", "Ascariasis", "Candidiasis", "Congenital malformation of heart, unspecified",
    "Cardiac failure and arrhythmia", "Disseminated intravascular coagulation",
    "Other secondary pulmonary hypertension", "Respiratory failure, unspecified",
    "Pneumonia due to COVID-19", "Pneumonitis due to inhalation of food and vomit",
    "Aspiration of fluid complication", "Pneumothorax, unspecified",
    "Pneumonia, unspecified organism", "Tuberculosis", "Upper respiratory infection",
    "Otitis media", "Mucopurulent conjunctivitis", "Unspecified viral encephalitis",
    "Bacterial meningitis, unspecified", "Disorders of nervous system"
  ), class = "factor")
), class = c("data.table", "data.frame"), row.names = c(NA, -247L))
qnyhuwrf

qnyhuwrf1#

大多数情况下,在ggpolot2对象之外修改 Dataframe 并绘制它更容易。在这里,我总结了数据,以获得每种疾病的计数以及每种治疗分配的总计数。然后,您可以简单地通过计算比例count/total_count来在ggplot2中绘制它:

mydata %>% 
  group_by(sae_safety_coordinator, treatment_allocation, disease2) %>% 
  summarise(count = n()) %>%
  left_join(mydata %>% 
              group_by(treatment_allocation) %>% 
              summarise(total_count = n()), by = "treatment_allocation") %>%
  ggplot(aes(x = disease2, y =  100*count/total_count, colour = treatment_allocation)) + 
  geom_point(aes(shape=treatment_allocation), size=4) +
  coord_flip() + 
  labs(x="", y="% of patients") + 
  scale_colour_manual(values=c("grey","goldenrod3", "deepskyblue3", "seagreen3"))

输出:

相关问题