我的csv文件包含重复值,例如Mab1, Mab1rep, Mab2, Mab2rep, etc
。 Dataframe Data如下所示:
Data <- structure(list(Samples = c("Isotype_L", "Isotype_L", "Isotype_L",
"Isotype_L", "Mab1", "Mab1", "Mab1", "Mab1", "Mab1-GL", "Mab1-GL",
"Mab1-GL", "Mab1-GL", "Mab2", "Mab2", "Mab2", "Mab2", "Mab2-GL",
"Mab2-GL", "Mab2-GL", "Mab2-GL", "Mab3", "Mab3", "Mab3", "Mab3",
"Mab4", "Mab4", "Mab4", "Mab4", "Mab4", "Mab5", "Mab5", "Mab5",
"Mab5", "Mab5", "Isotype_K", "Isotype_K", "Isotype_K", "Isotype_K",
"Isotype_Lrep", "Isotype_Lrep", "Isotype_Lrep", "Isotype_Lrep",
"Mab1rep", "Mab1rep", "Mab1rep", "Mab1rep", "Mab1rep", "Mab1-GLrep",
"Mab1-GLrep", "Mab1-GLrep", "Mab1-GLrep", "Mab2rep", "Mab2rep",
"Mab2rep", "Mab2rep", "Mab2-GLrep", "Mab2-GLrep", "Mab2-GLrep",
"Mab2-GLrep", "Mab3rep", "Mab3rep", "Mab3rep", "Mab3rep", "Mab4rep",
"Mab4rep", "Mab4rep", "Mab4rep", "Mab4rep", "Mab5rep", "Mab5rep",
"Mab5rep", "Mab5rep", "Mab5rep", "Isotype_Krep", "Isotype_Krep",
"Isotype_Krep", "Isotype_Krep", "PosCtrl", "PosCtrl", "PosCtrl",
"PosCtrl", "PosCtrl", "neg-AF488", "neg-AF488", "neg-AF488",
"neg-AF488", "Negative", "Negative", "Negative", "Negative",
"PosCtrl_rep", "PosCtrl_rep", "PosCtrl_rep", "PosCtrl_rep", "neg-AF488rep",
"neg-AF488rep", "neg-AF488rep", "neg-AF488rep", "Negative_rep",
"Negative_rep", "Negative_rep", "Negative_rep"), Blue = c(128.3952818,
120.2831546, 143.243713, 132.0577827, 133.8880534, 133.7664632,
121.0706891, 157.5932623, 182.4168577, 160.3366789, 205.4662033,
194.5710452, 136.0504487, 130.899206, 158.7230946, 146.368408,
152.2359201, 135.2182368, 142.0670308, 117.5533153, 135.8317231,
147.2705529, 130.3724567, 137.7607945, 135.0915241, 114.6307573,
153.3744009, 148.6203231, 141.6522212, 142.7500602, 129.3132835,
133.323963, 161.4505614, 120.3986388, 149.467766, 131.8404767,
121.1891517, 134.1152953, 142.9095762, 148.1782023, 133.1172244,
132.8860874, 124.8857092, 140.9295437, 122.7443303, 142.281986,
148.2327674, 138.7267188, 147.7389215, 157.2358721, 153.455753,
135.239042, 168.5716308, 122.357492, 141.6833326, 125.6991336,
121.3251682, 142.712414, 174.2987679, 140.9524518, 121.1017373,
154.801132, 126.8055734, 145.4754619, 168.1953102, 121.5520058,
137.4914411, 142.5554603, 147.9192906, 123.1908202, 134.2369485,
132.6270733, 143.3067567, 120.2250493, 127.5301465, 142.1151132,
125.718732, 117.2397291, 134.7169574, 120.9030571, 138.0262017,
121.5363059, 140.1157374, 171.9441906, 179.801995, 157.7747676,
135.5647523, 130.947343, 124.0994119, 117.3040363, 120.2912237,
128.9369029, 129.2967454, 134.7686437, 127.5407896, 155.7879164,
134.9068068, 121.4993647, 146.2323789, 131.6257992, 161.208799,
137.8464021), Green = c(204.0416907, 179.8289799, 192.7909809,
185.1904749, 119.5289134, 116.1968717, 119.8961343, 119.3418334,
114.7639073, 113.7169804, 118.3994388, 118.0875025, 120.7343683,
119.6826046, 121.079657, 124.4646777, 118.125646, 114.1900465,
114.0732686, 110.5228171, 115.0555818, 118.7761173, 113.2995208,
118.5396075, 167.6058496, 149.1461499, 189.7257013, 207.9481807,
177.2098519, 118.5133042, 118.6931648, 119.4754029, 128.9372642,
129.7043945, 112.037337, 111.9090535, 110.2099861, 112.2431433,
191.4316539, 201.5396396, 190.3129216, 192.7112734, 114.2036743,
115.6031688, 115.5844771, 115.7509866, 118.9890215, 112.9275697,
115.6021348, 119.0952462, 117.3730964, 113.6875097, 117.9319529,
114.2584918, 182.7833727, 111.9750247, 114.6643268, 117.7445263,
119.7687462, 113.3304581, 146.4097633, 114.3161156, 111.3511068,
200.9120144, 218.8782048, 169.1520322, 161.2219501, 266.5332884,
117.3344686, 117.3277836, 118.1452713, 115.3104536, 127.5856625,
112.5214363, 116.5449408, 115.1459536, 111.7753407, 349.2590405,
385.2193187, 439.0155097, 490.8051766, 394.1068064, 107.9149422,
108.005748, 108.1659999, 109.7366457, 107.8067543, 108.471598,
108.4746003, 108.6726188, 330.5756935, 329.6602842, 243.6285135,
266.0160698, 107.21539, 108.4953225, 104.6257189, 108.7797861,
108.3317481, 107.2107311, 107.6584237, 106.8200559), Green_norm = c(1.59,
1.5, 1.35, 1.4, 0.89, 0.87, 0.99, 0.76, 0.63, 0.71, 0.58, 0.61,
0.89, 0.91, 0.76, 0.85, 0.78, 0.84, 0.8, 0.94, 0.85, 0.81, 0.87,
0.86, 1.24, 1.3, 1.24, 1.4, 1.25, 0.83, 0.92, 0.9, 0.8, 1.08,
0.75, 0.85, 0.91, 0.84, 1.34, 1.36, 1.43, 1.45, 0.91, 0.82, 0.94,
0.81, 0.8, 0.81, 0.78, 0.76, 0.76, 0.84, 0.7, 0.93, 1.29, 0.89,
0.95, 0.83, 0.69, 0.8, 1.21, 0.74, 0.88, 1.38, 1.3, 1.39, 1.17,
1.87, 0.79, 0.95, 0.88, 0.87, 0.89, 0.94, 0.91, 0.81, 0.89, 2.98,
2.86, 3.63, 3.56, 3.24, 0.77, 0.63, 0.6, 0.7, 0.8, 0.83, 0.87,
0.93, 2.75, 2.56, 1.88, 1.97, 0.84, 0.7, 0.78, 0.9, 0.74, 0.81,
0.67, 0.77)), class = "data.frame", row.names = c(NA, -102L))
我使用ggplot2库绘制箱线图:首先,我想看看样本的分布,并了解重复之间的差异。
ggplot(Data, aes(x = reorder (Samples, -Green_norm), y = Green_norm, fill = Samples)) +
geom_boxplot(alpha = 0.5) + geom_point(aes(colour=Samples))+
theme_bw() +
rotate_x_text(angle = 45)
我得到的图如下:
其次,我想按样本类型对重复样本进行分组,例如Mab1及其重复样本Mab1rep等。
1条答案
按热度按时间ljsrvy3e1#
您可以使用正则表达式替换来删除Samples列中出现的任何“rep”或“_rep”,然后使用现有的绘图代码。我没有
rotate_x_text
函数,所以我通过theme
执行等效操作。我还修改了绘图代码,以使用不同的列名,而不是覆盖Samples。