R语言 ggplot2分组重复样品

rm5edbpk  于 2023-01-18  发布在  其他
关注(0)|答案(1)|浏览(169)

我的csv文件包含重复值,例如Mab1, Mab1rep, Mab2, Mab2rep, etc。 Dataframe Data如下所示:

Data <- structure(list(Samples = c("Isotype_L", "Isotype_L", "Isotype_L", 
"Isotype_L", "Mab1", "Mab1", "Mab1", "Mab1", "Mab1-GL", "Mab1-GL", 
"Mab1-GL", "Mab1-GL", "Mab2", "Mab2", "Mab2", "Mab2", "Mab2-GL", 
"Mab2-GL", "Mab2-GL", "Mab2-GL", "Mab3", "Mab3", "Mab3", "Mab3", 
"Mab4", "Mab4", "Mab4", "Mab4", "Mab4", "Mab5", "Mab5", "Mab5", 
"Mab5", "Mab5", "Isotype_K", "Isotype_K", "Isotype_K", "Isotype_K", 
"Isotype_Lrep", "Isotype_Lrep", "Isotype_Lrep", "Isotype_Lrep", 
"Mab1rep", "Mab1rep", "Mab1rep", "Mab1rep", "Mab1rep", "Mab1-GLrep", 
"Mab1-GLrep", "Mab1-GLrep", "Mab1-GLrep", "Mab2rep", "Mab2rep", 
"Mab2rep", "Mab2rep", "Mab2-GLrep", "Mab2-GLrep", "Mab2-GLrep", 
"Mab2-GLrep", "Mab3rep", "Mab3rep", "Mab3rep", "Mab3rep", "Mab4rep", 
"Mab4rep", "Mab4rep", "Mab4rep", "Mab4rep", "Mab5rep", "Mab5rep", 
"Mab5rep", "Mab5rep", "Mab5rep", "Isotype_Krep", "Isotype_Krep", 
"Isotype_Krep", "Isotype_Krep", "PosCtrl", "PosCtrl", "PosCtrl", 
"PosCtrl", "PosCtrl", "neg-AF488", "neg-AF488", "neg-AF488", 
"neg-AF488", "Negative", "Negative", "Negative", "Negative", 
"PosCtrl_rep", "PosCtrl_rep", "PosCtrl_rep", "PosCtrl_rep", "neg-AF488rep", 
"neg-AF488rep", "neg-AF488rep", "neg-AF488rep", "Negative_rep", 
"Negative_rep", "Negative_rep", "Negative_rep"), Blue = c(128.3952818, 
120.2831546, 143.243713, 132.0577827, 133.8880534, 133.7664632, 
121.0706891, 157.5932623, 182.4168577, 160.3366789, 205.4662033, 
194.5710452, 136.0504487, 130.899206, 158.7230946, 146.368408, 
152.2359201, 135.2182368, 142.0670308, 117.5533153, 135.8317231, 
147.2705529, 130.3724567, 137.7607945, 135.0915241, 114.6307573, 
153.3744009, 148.6203231, 141.6522212, 142.7500602, 129.3132835, 
133.323963, 161.4505614, 120.3986388, 149.467766, 131.8404767, 
121.1891517, 134.1152953, 142.9095762, 148.1782023, 133.1172244, 
132.8860874, 124.8857092, 140.9295437, 122.7443303, 142.281986, 
148.2327674, 138.7267188, 147.7389215, 157.2358721, 153.455753, 
135.239042, 168.5716308, 122.357492, 141.6833326, 125.6991336, 
121.3251682, 142.712414, 174.2987679, 140.9524518, 121.1017373, 
154.801132, 126.8055734, 145.4754619, 168.1953102, 121.5520058, 
137.4914411, 142.5554603, 147.9192906, 123.1908202, 134.2369485, 
132.6270733, 143.3067567, 120.2250493, 127.5301465, 142.1151132, 
125.718732, 117.2397291, 134.7169574, 120.9030571, 138.0262017, 
121.5363059, 140.1157374, 171.9441906, 179.801995, 157.7747676, 
135.5647523, 130.947343, 124.0994119, 117.3040363, 120.2912237, 
128.9369029, 129.2967454, 134.7686437, 127.5407896, 155.7879164, 
134.9068068, 121.4993647, 146.2323789, 131.6257992, 161.208799, 
137.8464021), Green = c(204.0416907, 179.8289799, 192.7909809, 
185.1904749, 119.5289134, 116.1968717, 119.8961343, 119.3418334, 
114.7639073, 113.7169804, 118.3994388, 118.0875025, 120.7343683, 
119.6826046, 121.079657, 124.4646777, 118.125646, 114.1900465, 
114.0732686, 110.5228171, 115.0555818, 118.7761173, 113.2995208, 
118.5396075, 167.6058496, 149.1461499, 189.7257013, 207.9481807, 
177.2098519, 118.5133042, 118.6931648, 119.4754029, 128.9372642, 
129.7043945, 112.037337, 111.9090535, 110.2099861, 112.2431433, 
191.4316539, 201.5396396, 190.3129216, 192.7112734, 114.2036743, 
115.6031688, 115.5844771, 115.7509866, 118.9890215, 112.9275697, 
115.6021348, 119.0952462, 117.3730964, 113.6875097, 117.9319529, 
114.2584918, 182.7833727, 111.9750247, 114.6643268, 117.7445263, 
119.7687462, 113.3304581, 146.4097633, 114.3161156, 111.3511068, 
200.9120144, 218.8782048, 169.1520322, 161.2219501, 266.5332884, 
117.3344686, 117.3277836, 118.1452713, 115.3104536, 127.5856625, 
112.5214363, 116.5449408, 115.1459536, 111.7753407, 349.2590405, 
385.2193187, 439.0155097, 490.8051766, 394.1068064, 107.9149422, 
108.005748, 108.1659999, 109.7366457, 107.8067543, 108.471598, 
108.4746003, 108.6726188, 330.5756935, 329.6602842, 243.6285135, 
266.0160698, 107.21539, 108.4953225, 104.6257189, 108.7797861, 
108.3317481, 107.2107311, 107.6584237, 106.8200559), Green_norm = c(1.59, 
1.5, 1.35, 1.4, 0.89, 0.87, 0.99, 0.76, 0.63, 0.71, 0.58, 0.61, 
0.89, 0.91, 0.76, 0.85, 0.78, 0.84, 0.8, 0.94, 0.85, 0.81, 0.87, 
0.86, 1.24, 1.3, 1.24, 1.4, 1.25, 0.83, 0.92, 0.9, 0.8, 1.08, 
0.75, 0.85, 0.91, 0.84, 1.34, 1.36, 1.43, 1.45, 0.91, 0.82, 0.94, 
0.81, 0.8, 0.81, 0.78, 0.76, 0.76, 0.84, 0.7, 0.93, 1.29, 0.89, 
0.95, 0.83, 0.69, 0.8, 1.21, 0.74, 0.88, 1.38, 1.3, 1.39, 1.17, 
1.87, 0.79, 0.95, 0.88, 0.87, 0.89, 0.94, 0.91, 0.81, 0.89, 2.98, 
2.86, 3.63, 3.56, 3.24, 0.77, 0.63, 0.6, 0.7, 0.8, 0.83, 0.87, 
0.93, 2.75, 2.56, 1.88, 1.97, 0.84, 0.7, 0.78, 0.9, 0.74, 0.81, 
0.67, 0.77)), class = "data.frame", row.names = c(NA, -102L))

我使用ggplot2库绘制箱线图:首先,我想看看样本的分布,并了解重复之间的差异。

ggplot(Data, aes(x = reorder (Samples, -Green_norm), y =  Green_norm, fill = Samples)) + 
 geom_boxplot(alpha = 0.5) +       geom_point(aes(colour=Samples))+ 
theme_bw() +
rotate_x_text(angle = 45)

我得到的图如下:

其次,我想按样本类型对重复样本进行分组,例如Mab1及其重复样本Mab1rep等。

ljsrvy3e

ljsrvy3e1#

您可以使用正则表达式替换来删除Samples列中出现的任何“rep”或“_rep”,然后使用现有的绘图代码。我没有rotate_x_text函数,所以我通过theme执行等效操作。我还修改了绘图代码,以使用不同的列名,而不是覆盖Samples。

library(tidyverse)

data_new <- Data %>% 
  mutate(Samples_grouped = gsub('_*rep$', '', Samples))

ggplot(data_new, aes(x = reorder (Samples_grouped, -Green_norm), y =  Green_norm, fill = Samples_grouped)) + 
  geom_boxplot(alpha = 0.5) +       geom_point(aes(colour=Samples_grouped))+ 
  theme_bw() +
  theme(axis.text.x = element_text(angle = 45, hjust = 1, vjust = 1))

相关问题