R语言 在给定x处添加两条回归线截距之间延伸的垂直线段

rryofs0p  于 2023-01-18  发布在  其他
关注(0)|答案(2)|浏览(114)

我想在days==0处添加一条垂直线,从y的值(其中days==0位于x==0中)延伸到y的值(其中days==0位于x==1中)。

df <- structure(list(y = c(3, 4, 4, 3, 4, 4, 4, 3, 4, 4, 4, 3, 4, 4, 
4, 3, 3, 4, 3, 3, 4, 4, 4, 3, 4, 3, 3, 4, 4, 3, 4, 5, 4, 4, 4, 
5, 4, 5, 4, 4, 5, 4, 4, 5, 4, 4, 5, 4, 4, 4, 5, 5, 4, 4, 4, 4, 
5, 5, 5, 4, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 
5, 4, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 6, 6, 6, 7, 6, 6, 6, 7, 
6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 
6), x = structure(c(1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 
1L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 
2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 
2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 
2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L), .Label = c("0", 
"1"), class = "factor"), days = c(-8, -50, -84, -91, -69, -87, 
-89, -19, -61, -18, -46, -26, -35, -51, -88, -55, -36, -44, -24, 
-45, -78, -41, -38, -81, -74, -22, -82, -86, -39, -64, -66, -58, 
-25, -5, -29, -34, -30, -75, -57, -37, -32, -77, -31, -59, -67, 
-83, -70, -1, -65, -15, -27, -56, -71, -80, -12, -3, -76, -54, 
-52, -6, 35, 20, 53, 61, 43, 71, 88, 31, 17, 85, 21, 25, 16, 
46, 45, 41, 15, 48, 72, 63, 24, 12, 83, 40, 13, 10, 11, 79, 81, 
64, 38, 59, 3, 77, 39, 26, 68, 49, 87, 69, 75, 33, 34, 76, 78, 
86, 14, 36, 0, 44, 54, 58, 18, 80, 82, 89, 56, 2, 28, 74)), row.names = c(NA, 
-120L), class = c("tbl_df", "tbl", "data.frame"))
# https://evalf20.classes.andrewheiss.com/example/rdd/
library(tidyverse)

df %>%
  ggplot(aes(x = days, y = y, color = x)) +
    geom_point(size = 2, alpha = 0.5, position = position_jitter(seed = 42)) + 
    geom_smooth(data = filter(df, days < 0), method = "lm") +
    geom_smooth(data = filter(df, days >= 0), method = "lm") +
    geom_vline(xintercept = 0) + 
    labs(x = "Days from cutoff", y = "Outcome") + 
    guides(color = FALSE)

每条线的斜率可以变化,因此我们不能假设yx==0(左)中的days==0处的值总是在y的最小值处,如图所示。

iqjalb3h

iqjalb3h1#

您可以提取geom_smooth回归线的截距,并将其作为geom_segment添加到图中:

library(tidyverse)

df %>%
  ggplot(aes(x = days, y = y, color = x)) +
  geom_point(size = 2, alpha = 0.5, position = position_jitter(seed = 42)) + 
  geom_smooth(data = filter(df, days < 0), method = "lm") +
  geom_smooth(data = filter(df, days >= 0), method = "lm") +
  geom_vline(xintercept = 0) + 
  labs(x = "Days from cutoff", y = "Outcome") + 
  guides(color = "none") -> gg

lm(y~days, data = filter(df, days < 0))-> lm_neg
lm(y~days, data = filter(df, days >= 0))-> lm_pos


gg + 
  geom_segment(aes(x = 0, y = lm_neg$coefficients[1], 
                   xend = 0, yend = lm_pos$coefficients[1]), 
               colour = "yellow", size = 2)

mum43rcc

mum43rcc2#

df <- structure(list(y = c(3, 4, 4, 3, 4, 4, 4, 3, 4, 4, 4, 3, 4, 4, 
4, 3, 3, 4, 3, 3, 4, 4, 4, 3, 4, 3, 3, 4, 4, 3, 4, 5, 4, 4, 4, 
5, 4, 5, 4, 4, 5, 4, 4, 5, 4, 4, 5, 4, 4, 4, 5, 5, 4, 4, 4, 4, 
5, 5, 5, 4, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 
5, 4, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 6, 6, 6, 7, 6, 6, 6, 7, 
6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 
6), x = structure(c(1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 
1L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 
2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 
2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 
2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L), .Label = c("0", 
"1"), class = "factor"), days = c(-8, -50, -84, -91, -69, -87, 
-89, -19, -61, -18, -46, -26, -35, -51, -88, -55, -36, -44, -24, 
-45, -78, -41, -38, -81, -74, -22, -82, -86, -39, -64, -66, -58, 
-25, -5, -29, -34, -30, -75, -57, -37, -32, -77, -31, -59, -67, 
-83, -70, -1, -65, -15, -27, -56, -71, -80, -12, -3, -76, -54, 
-52, -6, 35, 20, 53, 61, 43, 71, 88, 31, 17, 85, 21, 25, 16, 
46, 45, 41, 15, 48, 72, 63, 24, 12, 83, 40, 13, 10, 11, 79, 81, 
64, 38, 59, 3, 77, 39, 26, 68, 49, 87, 69, 75, 33, 34, 76, 78, 
86, 14, 36, 0, 44, 54, 58, 18, 80, 82, 89, 56, 2, 28, 74)), row.names = c(NA, 
-120L), class = c("tbl_df", "tbl", "data.frame"))

# https://evalf20.classes.andrewheiss.com/example/rdd/

library(tidyverse)

y1 = predict(lm(y ~ days, filter(df, days < 0)), list(days=0))
y2 = predict(lm(y ~ days, filter(df, days >= 0)), list(days=0))

df %>%
  ggplot(aes(x = days, y = y, color = x)) +
    geom_point(size = 2, alpha = 0.5, position = position_jitter(seed = 42)) + 
    geom_smooth(data = filter(df, days < 0), method = "lm") +
    geom_smooth(data = filter(df, days >= 0), method = "lm") +
    geom_vline(xintercept = 0) + 
    labs(x = "Days from cutoff", y = "Outcome") + 
    guides(color = FALSE) + 
  annotate("segment", x=0,xend=0, y=y1, yend=y2  , color = "yellow", size = 3)

创建于2023年1月16日,使用reprex v2.0.2

相关问题