pandas 从范围中为python分配编号

zvokhttg  于 2022-12-02  发布在  Python
关注(0)|答案(1)|浏览(167)

I'm looking at a data set of scores.
I want to know the probability of each score based on the bin the score falls in using pd.cut
How can I take a value and assign it a probability based on the outputted table?
Code as follows

import pandas as pd

data = pd.DataFrame({'scores':[168.0, 44.0, 352.0, 128.0, 268.0, 228.0, 160.0, 376.0, 304.0, 124.0, 360.0, 36.0, 224.0, 176.0, 40.0, 28.0, 264.0, 292.0, 228.0, 80.0, 216.0, 132.0, 88.0, 220.0, 284.0, 308.0, 256.0, 360.0, 364.0, 128.0, 268.0, 72.0, 100.0, 320.0, 224.0, 300.0, 232.0, 316.0, 196.0, 248.0, 24.0, 396.0, 8.0, 248.0, 244.0, 392.0, 240.0, 28.0, 260.0, 220.0, 120.0, 56.0, 232.0, 216.0, 228.0, 232.0, 332.0, 280.0, 148.0, 84.0, 284.0, 268.0, 176.0, 324.0, 52.0, 112.0, 344.0, 296.0, 164.0, 28.0, 304.0, 344.0, 232.0, 340.0, 324.0, 248.0, 232.0, 400.0, 396.0, 36.0, 52.0, 204.0, 292.0, 96.0, 68.0, 392.0, 260.0, 224.0, 236.0, 248.0, 316.0, 292.0, 212.0, 276.0, 304.0, 124.0, 216.0, 48.0, 64.0, 228.0]})

frequencyTable = pd.cut(data['scores'], bins = 20, include_lowest=True, ordered=True, precision=4, right=False)
frequencyTable = frequencyTable.value_counts(sort=False)
frequencyTable = frequencyTable.reset_index()
frequencyTable['probability'] = frequencyTable['scores']/len(data)
print(frequencyTable)

Output as follows

index  scores  probability
0        [8.0, 27.6)       2         0.02
1       [27.6, 47.2)       7         0.07
2       [47.2, 66.8)       5         0.05
3       [66.8, 86.4)       4         0.04
4      [86.4, 106.0)       3         0.03
5     [106.0, 125.6)       4         0.04
6     [125.6, 145.2)       3         0.03
7     [145.2, 164.8)       3         0.03
8     [164.8, 184.4)       3         0.03
9     [184.4, 204.0)       1         0.01
10    [204.0, 223.6)       7         0.07
11    [223.6, 243.2)      14         0.14
12    [243.2, 262.8)       8         0.08
13    [262.8, 282.4)       6         0.06
14    [282.4, 302.0)       7         0.07
15    [302.0, 321.6)       7         0.07
16    [321.6, 341.2)       4         0.04
17    [341.2, 360.8)       5         0.05
18    [360.8, 380.4)       2         0.02
19  [380.4, 400.392)       5         0.05

I'd like to be able to take input = 265 and return 6%

l7wslrjt

l7wslrjt1#

frequencyTable是一个表,其中第一列是Interval,第三列是百分比。因此,为了得到你想要的结果,你可以遍历这个表,寻找输入值(v=265)在该行的Interval中的项,如果是,你就取第三列中的值。所以,类似这样:

v = 265
p = -1
for bin in frequencyTable.values:
    if v in bin[0]:
        p = bin[2] * 100
        break

print(p, '%')

结果:

6.0 %

相关问题