我的数据如下所示
structure(list(Country.Name = c("Afghanistan", "Africa Eastern and Southern",
"Africa Western and Central", "Albania", "Algeria", "American Samoa",
"Andorra", "Angola", "Antigua and Barbuda", "Arab World"), Indicator.Name.x = c("Employment in agriculture, female (% of female employment) (modeled ILO estimate)",
"Employment in agriculture, female (% of female employment) (modeled ILO estimate)",
"Employment in agriculture, female (% of female employment) (modeled ILO estimate)",
"Employment in agriculture, female (% of female employment) (modeled ILO estimate)",
"Employment in agriculture, female (% of female employment) (modeled ILO estimate)",
"Employment in agriculture, female (% of female employment) (modeled ILO estimate)",
"Employment in agriculture, female (% of female employment) (modeled ILO estimate)",
"Employment in agriculture, female (% of female employment) (modeled ILO estimate)",
"Employment in agriculture, female (% of female employment) (modeled ILO estimate)",
"Employment in agriculture, female (% of female employment) (modeled ILO estimate)"
), X1991_f = c(76.37999725, 70.89825914, 57.87195138, 66.16000366,
14.88000011, NA, NA, 45.75, NA, 46.76332995), X1992_f = c(76.41000366,
70.90647329, 57.68949562, 65.09999847, 14.65999985, NA, NA, 46.56999969,
NA, 46.41667524), X1993_f = c(76.76000214, 70.7319986, 57.41803289,
64.56999969, 14.55000019, NA, NA, 47.09999847, NA, 44.36386763
), X1994_f = c(76.33999634, 70.56800973, 57.07966283, 63.97000122,
14.44999981, NA, NA, 47.45999908, NA, 44.80191031), X1995_f = c(76.65000153,
70.35511259, 56.72241777, 63.27000046, 14.31000042, NA, NA, 46.81999969,
NA, 43.64430037), X1996_f = c(77.05000305, 70.18675759, 56.29510159,
64.83999634, 14.02999973, NA, NA, 46.24000168, NA, 42.95114865
), X1997_f = c(77.41000366, 69.97559992, 55.80528904, 65.48999786,
14.01000023, NA, NA, 45.47000122, NA, 42.12963533), X1998_f = c(77.75,
69.8989892, 55.26462465, 65.01999664, 13.39000034, NA, NA, 44.41999817,
NA, 40.20414039), X1999_f = c(78.19000244, 69.8013891, 54.73848758,
64.75, 13.30000019, NA, NA, 45.24000168, NA, 38.70946622), X2000_f = c(78.79000092,
70.24647238, 54.24866548, 63.97999954, 13.55000019, NA, NA, 44.93000031,
NA, 41.05161104), X2001_f = c(79.63999939, 70.42984028, 53.48707555,
63.20000076, 13.03999996, NA, NA, 44.09999847, NA, 38.05426761
), X2002_f = c(78.72000122, 70.92044757, 52.72674308, 62.18999863,
12.34000015, NA, NA, 43.99000168, NA, 36.20345416), X2003_f = c(78.38999939,
70.90105446, 52.08667381, 61.06000137, 11.60999966, NA, NA, 43.97999954,
NA, 37.82208055), X2004_f = c(78.95999908, 70.67664189, 51.49707855,
60.34999847, 10.06999969, NA, NA, 44.22999954, NA, 38.9673068
), X2005_f = c(78.08999634, 70.79609724, 50.5431249, 59.65999985,
8.859999657, NA, NA, 46.08000183, NA, 38.32149263), X2006_f = c(77.63999939,
70.16321174, 49.64469383, 58.54000092, 7.489999771, NA, NA, 47.77999878,
NA, 36.74298129), X2007_f = c(76.37999725, 69.54450304, 48.60527577,
57.72999954, 6.269999981, NA, NA, 49.38000107, NA, 37.2889534
), X2008_f = c(76.16000366, 68.54660244, 47.37431999, 56.88999939,
5.329999924, NA, NA, 51.13999939, NA, 35.97835105), X2009_f = c(74.22000122,
68.0518831, 46.19847271, 57.22000122, 4.199999809, NA, NA, 52.75,
NA, 35.34189842), X2010_f = c(72.90000153, 67.76753275, 44.96248185,
53.09000015, 3.519999981, NA, NA, 55.47000122, NA, 33.55782243
), X2011_f = c(72.41000366, 66.98750257, 43.8503065, 54.36999893,
2.960000038, NA, NA, 58.00999832, NA, 32.65826301), X2012_f = c(70.94999695,
66.02737174, 42.36432472, 57, 3.170000076, NA, NA, 58.45000076,
NA, 30.48576338), X2013_f = c(69.98999786, 65.45106245, 41.00997613,
53.16999817, 3.319999933, NA, NA, 58.45999908, NA, 31.3212523
), X2014_f = c(69.18000031, 64.74486799, 39.422334, 49.88000107,
3.460000038, NA, NA, 58.20000076, NA, 31.1129401), X2015_f = c(68.59999847,
64.0600908, 37.89897833, 47.68000031, 3.569999933, NA, NA, 57.79999924,
NA, 29.60966539), X2016_f = c(67.55999756, 63.55588709, 37.23167103,
45.22999954, 3.700000048, NA, NA, 57.52000046, NA, 28.65057866
), X2017_f = c(66.22000122, 62.95115242, 36.39239843, 42.38999939,
3.839999914, NA, NA, 57.20000076, NA, 28.24277769), X2018_f = c(65.76999664,
62.45878849, 35.75188861, 42.27999878, 3.579999924, NA, NA, 57.06000137,
NA, 23.98984823), X2019_f = c(64.95999908, 61.93481888, 34.98255693,
41.63000107, 3.380000114, NA, NA, 56.77999878, NA, 23.03898223
), Indicator.Name.y = c("Employment in agriculture, male (% of male employment) (modeled ILO estimate)",
"Employment in agriculture, male (% of male employment) (modeled ILO estimate)",
"Employment in agriculture, male (% of male employment) (modeled ILO estimate)",
"Employment in agriculture, male (% of male employment) (modeled ILO estimate)",
"Employment in agriculture, male (% of male employment) (modeled ILO estimate)",
"Employment in agriculture, male (% of male employment) (modeled ILO estimate)",
"Employment in agriculture, male (% of male employment) (modeled ILO estimate)",
"Employment in agriculture, male (% of male employment) (modeled ILO estimate)",
"Employment in agriculture, male (% of male employment) (modeled ILO estimate)",
"Employment in agriculture, male (% of male employment) (modeled ILO estimate)"
), X1991_m = c(61.11999893, 61.38082542, 60.42776603, 51.61000061,
26.45999908, NA, NA, 34.15000153, NA, 31.40304127), X1992_m = c(61.40000153,
61.57731927, 60.35758857, 52.63000107, 26.31999969, NA, NA, 33.54000092,
NA, 31.06940314), X1993_m = c(62.24000168, 61.6839476, 60.41184424,
52.13999939, 26.12999916, NA, NA, 33.63999939, NA, 30.46141617
), X1994_m = c(62.36999893, 61.68793227, 60.38606372, 51.59000015,
25.89999962, NA, NA, 33.02999878, NA, 30.04424893), X1995_m = c(62.15999985,
61.65570615, 60.32138756, 50.84000015, 25.63999939, NA, NA, 32.58000183,
NA, 29.61220427), X1996_m = c(62.54999924, 61.54173509, 60.11798216,
49, 25.36000061, NA, NA, 31.98999977, NA, 28.91751457), X1997_m = c(62.86000061,
61.66644396, 59.86985078, 48.72000122, 24.96999931, NA, NA, 31.84000015,
NA, 28.32475036), X1998_m = c(63.08000183, 61.65208625, 59.63380569,
47.93999863, 24.64999962, NA, NA, 32.06999969, NA, 27.85096791
), X1999_m = c(63.31000137, 61.56719624, 59.46312819, 46.31000137,
24.31999969, NA, NA, 30.65999985, NA, 27.42025206), X2000_m = c(63.65000153,
61.63792017, 59.1542363, 44.91999817, 23.70999908, NA, NA, 30.19000053,
NA, 27.1720503), X2001_m = c(64.01999664, 61.78336159, 58.77138786,
42.95999908, 23.07999992, NA, NA, 30.44000053, NA, 26.93498439
), X2002_m = c(61.97000122, 61.75729597, 58.04072823, 41.70000076,
23.21999931, NA, NA, 29.21999931, NA, 26.502578), X2003_m = c(61.15000153,
61.67732635, 57.43396689, 40.93999863, 23.36000061, NA, NA, 28.97999954,
NA, 26.1462127), X2004_m = c(60.50999832, 61.38274438, 56.65664598,
39.77999878, 21.80999947, NA, NA, 28.14999962, NA, 26.00187369
), X2005_m = c(59.29999924, 61.34758091, 56.26699531, 38.56000137,
20.18000031, NA, NA, 30.14999962, NA, 25.03849467), X2006_m = c(58.38999939,
60.77753162, 55.81997131, 37.43000031, 18.70999908, NA, NA, 32.63999939,
NA, 24.39160015), X2007_m = c(57.29000092, 60.37245552, 55.24539063,
36.68000031, 17.23999977, NA, NA, 35.02000046, NA, 23.4421701
), X2008_m = c(56.36000061, 60.08068911, 54.64176858, 35.75,
15.90999985, NA, NA, 37.56000137, NA, 22.77119995), X2009_m = c(53.88999939,
60.04838745, 53.96863359, 35.04999924, 14.61999989, NA, NA, 40.20000076,
NA, 21.59570526), X2010_m = c(51.38999939, 59.77404798, 53.1852058,
34.25999832, 13.35999966, NA, NA, 42.54000092, NA, 20.65910673
), X2011_m = c(50.20000076, 59.32684789, 52.3719792, 38.81000137,
12.30000019, NA, NA, 44.52000046, NA, 20.42663255), X2012_m = c(47.81000137,
58.86642424, 51.54752003, 37.59000015, 12.28999996, NA, NA, 44.09999847,
NA, 19.87213684), X2013_m = c(45.88000107, 58.75190065, 50.68928721,
37.56999969, 12.15999985, NA, NA, 43.99000168, NA, 19.51637588
), X2014_m = c(44.09000015, 58.25105258, 49.53175737, 36.75999832,
11.94999981, NA, NA, 44.15000153, NA, 19.30844549), X2015_m = c(42.29999924,
57.82885815, 48.63115767, 36.74000168, 11.77999973, NA, NA, 44.54000092,
NA, 18.8722441), X2016_m = c(40.65999985, 57.63116957, 48.50541695,
36.29000092, 11.56000042, NA, NA, 44.79999924, NA, 18.44252983
), X2017_m = c(38.36000061, 57.44003748, 47.97781438, 34.95999908,
11.35999966, NA, NA, 44.79999924, NA, 17.91710234), X2018_m = c(37.25999832,
57.10172448, 47.43563728, 33.72000122, 11.05000019, NA, NA, 44.75,
NA, 17.47135992), X2019_m = c(36.59999847, 56.98322298, 46.98550533,
32.70999908, 10.76000023, NA, NA, 44.65000153, NA, 16.91990401
)), row.names = c(NA, 10L), class = "data.frame")
以_f
结尾的变量表示该年女性在农业中的份额,而以_m
结尾的变量表示该年男性在农业中的份额。我如何找到以_f
和_m
结尾的相应变量的差异。
例如,1991年女性在农业中的份额由变量X1991_f
给出,而1991年男性在农业中的份额为X1991_m
。粗略地说,我可以通过减去这两个变量来找到差异,但鉴于我有1991年至2019年的跨国和时间序列数据,我如何有效地为所有数据年份做到这一点?
谢谢你。
2条答案
按热度按时间mwkjh3gx1#
你可以通过使用
grep
索引和简单的减法在R中实现这一点。我将缩短您的数据集,使其成为一个 * 最小 * 可重复的示例(
df_small
),仅使用三年(1991 - 1993):同时创建新列和相减的代码是:
如果你为女性(
idx_f
)和男性(idx_m
)创建索引,可能会更容易看到发生了什么:无论哪种方式,输出都是:
数据
vmjh9lq92#
您的数据当前是宽格式,但如果它是长格式,此操作会容易得多。因此,您可以首先使用
pivot_longer()
从dplyr
重新调整 Dataframe ,然后进行计算。它将返回如下内容:
请注意,这是从删除
Indicator.Name
列(您有两个)开始的,因为一旦我们重新塑造数据,此信息将分别吸收到f
和m
列中。此外,现在数据是长格式的,您可能还希望使用
lag()
或lead()
函数来查看跨年度差异(例如,在给定的国家内,农业中的男性/女性就业情况或他们之间的差异如何随时间变化)。这些文档是here。