python 如何使用Plotly中的直方图将所有离群值合并到一个bin中?

t5zmwmid  于 2023-01-19  发布在  Python
关注(0)|答案(2)|浏览(145)

所以问题是:

    • 是否可以在Plotly中绘制直方图,将大于某个阈值的所有值分组到一个条柱中?**

所需输出:

但是使用标准的plotly Histogram类,我只能得到以下输出:

import pandas as pd

from plotly import graph_objs as go
from plotly.offline import init_notebook_mode, iplot

init_notebook_mode()

test_df = pd.DataFrame({'values': [1]*10 + [2]*9 + 
                              [3.1]*4 + [3.6]*4 +  
                              [4]*7 + [5]*6 + [6]*5 + [7]*4 + [8]*3 +
                              [9]*2 + [10]*1 + 
                              [111.2]*2 + [222.3]*2 + [333.4]*1})  # <- I want to group them into one bin "> 10"

data = [go.Histogram(x=test_df['values'], 
                     xbins=dict(
                        start=0,
                        end=11,
                        size=1
                     ),
                     autobinx = False)]

layout = go.Layout(
    title='values'
)
fig = go.Figure(data=data, layout=layout)

iplot(fig, filename='basic histogram')

kpbwa7wx

kpbwa7wx1#

所以在花了一些时间之后,我自己找到了一个解决方案,使用numpy.Histogram和plotly Bar图表。
把它留在这里以防有人会面临同样的问题。

def plot_bar_with_outliers(series, name, end):
    start = int(series.min())
    size = 1

    # Making a histogram
    largest_value = series.max()
    if largest_value > end:
        hist = np.histogram(series, bins=list(range(start, end+size, size)) + [largest_value])
    else:
        hist = np.histogram(series, bins=list(range(start, end+size, size)) + [end+size])

    # Adding labels to the chart
    labels = []
    for i, j in zip(hist[1][0::1], hist[1][1::1]):
        if j <= end:
            labels.append('{} - {}'.format(i, j))
        else:
            labels.append('> {}'.format(i))

    # Plotting the graph
    data = [go.Bar(x=labels,
                   y=hist[0])]

    layout = go.Layout(
        title=name
    )
    fig = go.Figure(data=data, layout=layout)

    iplot(fig, filename='basic histogram')

plot_bar_with_outliers(test_df['values'], 'values', end=11)

rjee0c15

rjee0c152#

上述选项的替代方案如下:

import numpy as np

# Initialize the values that you want in the histogram.
values = [7,8,8,8,9,10,10,11,12,13,14]

# Initialize the maximum x-axis value that you want.
maximum_value = 11

# Plot the histogram.
fig = go.Figure()

fig.add_trace(
    go.Histogram(
        x=[np.minimum(maximum_value, num) for num in values],
        xbins = {"size": 1}
    )
)

fig.show()

Link to Image

相关问题