pandas 如何在FacetGrid中设置多个直方图

ovfsdjhp  于 2023-03-06  发布在  其他
关注(0)|答案(1)|浏览(84)

我有一个95列的 Dataframe ,其中是不同测量的Max()、Min()和Avg()值,我想在3列32行的FacetGrid上绘制它们的直方图,其中第1列是最大值,第2列是平均值,第3列是最小值,行是测量类型。
我有这个代码现在:

fig, axes = plt.subplots(nrows=32, ncols=3, figsize=(20, 96))
columnas_numeric = df_agg_new.select_dtypes(include=['float64', 'int64']).columns
columnas_numeric = columnas_numeric.drop('season')

for i, colum in enumerate(columns):
    if colum.endswith('Max'):
        sns.histplot(
            data     = df_agg_new,
            x        = colum,
            stat     = "count",
            kde      = True,
            line_kws = {'linewidth': 2},
            alpha    = 0.3,
            ax       = axes[int(i/3)][0]
        )
        axes[int(i/3)][0].set_title(colum, fontsize = 7, fontweight = "bold")
        axes[int(i/3)][0].tick_params(labelsize = 6)
        axes[int(i/3)][0].set_xlabel("")
    elif colum.endswith('Avg'):
        sns.histplot(
            data     = df_agg_new,
            x        = colum,
            stat     = "count",
            kde      = True,
            line_kws = {'linewidth': 2},
            alpha    = 0.3,
            ax       = axes[int(i/3)][1]
        )
        axes[int(i/3)][1].set_title(colum, fontsize = 7, fontweight = "bold")
        axes[int(i/3)][1].tick_params(labelsize = 6)
        axes[int(i/3)][1].set_xlabel("")
    else:
        sns.histplot(
            data     = df_agg_new,
            x        = colum,
            stat     = "count",
            kde      = True,
            line_kws = {'linewidth': 2},
            alpha    = 0.3,
            ax       = axes[int(i/3)][2]
        )
        axes[int(i/3)][2].set_title(colum, fontsize = 7, fontweight = "bold")
        axes[int(i/3)][2].tick_params(labelsize = 6)
        axes[int(i/3)][2].set_xlabel("")
    
    
fig.tight_layout()
plt.subplots_adjust(top = 0.97)
fig.suptitle('Distribution plots', fontsize = 10, fontweight = "bold");

但不起作用,因为某些度量值转到了其他行。
这是我的专栏列表:

Index(['Var1_phase2_Avg', 'Var1_phase2_Max',
       'Var1_phase2_Min', 'Var1_phase3_Avg',
       'Var1_phase3_Max', 'Var1_phase3_Min',
       'Var1_phase1_Avg', 'Var1_phase1_Max',
       'Var1_phase1_Min', 'Var1_phase4_Avg',
       'Var1_phase4_Max', 'Var1_phase4_Min',
       'Var2_phase2_Avg', 'Var2_phase2_Max',
       'Var2_phase3_Avg', 'Var2_phase3_Max',
       'Var2_phase1_Avg', 'Var2_phase1_Max',
       'Var2_phase4_Avg', 'Var2_phase4_Max',
       'Var3_phase2_Avg', 'Var3_phase2_Max',
       'Var3_phase2_Min', 'Var3_phase3_Avg',
       'Var3_phase3_Max', 'Var3_phase3_Min',
       'Var3_phase1_Avg', 'Var3_phase1_Max',
       'Var3_phase1_Min', 'Var3_phase4_Avg',
       'Var3_phase4_Max', 'Var3_phase4_Min', 
       'Var4_phase2_Avg', 'Var4_phase2_Max', 
       'Var4_phase3_Avg', 'Var4_phase3_Max', 
       'Var4_phase1_Avg', 'Var4_phase1_Max', 
       'Var4_phase4_Avg', 'Var4_phase4_Max', 
       'Var5_phase2_Avg', 'Var5_phase2_Max',
       'Var5_phase2_Min', 'Var5_phase3_Avg', 
       'Var5_phase3_Max', 'Var5_phase3_Min', 
       'Var5_phase1_Avg', 'Var5_phase1_Max', 
       'Var5_phase1_Min', 'Var5_phase4_Avg', 
       'Var5_phase4_Max', 'Var5_phase4_Min', 
       'Var6_phase2_Avg', 'Var6_phase2_Max', 
       'Var6_phase2_Min', 'Var6_phase3_Avg', 
       'Var6_phase3_Max', 'Var6_phase1_Avg', 
       'Var6_phase1_Max', 'Var7_phase2_Avg', 
       'Var7_phase2_Max', 'Var7_phase2_Min',
       'Var7_phase3_Avg', 'Var7_phase3_Max',
       'Var7_phase3_Min', 'Var7_phase1_Avg', 
       'Var7_phase1_Max', 'Var7_phase1_Min', 
       'Var7_phase4_Avg', 'Var7_phase4_Max', 
       'Var7_phase4_Min', 'Var8_phase2_Avg', 
       'Var8_phase2_Max', 'Var8_phase2_Min', 
       'Var8_phase3_Avg', 'Var8_phase3_Max', 
       'Var8_phase3_Min', 'Var8_phase1_Avg', 
       'Var8_phase1_Max', 'Var8_phase1_Min',
       'Var8_phase4_Avg', 'Var8_phase4_Max', 
       'Var8_phase4_Min'],
      dtype='object')

这就是我希望得到的表示方式(用他们的历史图):

'Var1_phase2_Avg', 'Var1_phase2_Max', 'Var1_phase2_Min', 
       'Var1_phase3_Avg', 'Var1_phase3_Max', 'Var1_phase3_Min',
       'Var1_phase1_Avg', 'Var1_phase1_Max', 'Var1_phase1_Min',
       'Var1_phase4_Avg', 'Var1_phase4_Max', 'Var1_phase4_Min',
       
       'Var2_phase2_Avg', 'Var2_phase2_Max',
       'Var2_phase3_Avg', 'Var2_phase3_Max',
       'Var2_phase1_Avg', 'Var2_phase1_Max',
       'Var2_phase4_Avg', 'Var2_phase4_Max',

       'Var3_phase2_Avg', 'Var3_phase2_Max', 'Var3_phase2_Min',
       'Var3_phase3_Avg', 'Var3_phase3_Max', 'Var3_phase3_Min',
       'Var3_phase1_Avg', 'Var3_phase1_Max', 'Var3_phase1_Min',
       'Var3_phase4_Avg', 'Var3_phase4_Max', 'Var3_phase4_Min', 
       
       'Var4_phase2_Avg', 'Var4_phase2_Max', 
       'Var4_phase3_Avg', 'Var4_phase3_Max', 
       'Var4_phase1_Avg', 'Var4_phase1_Max', 
       'Var4_phase4_Avg', 'Var4_phase4_Max', 

       'Var5_phase2_Avg', 'Var5_phase2_Max', 'Var5_phase2_Min', 
       'Var5_phase3_Avg', 'Var5_phase3_Max', 'Var5_phase3_Min', 
       'Var5_phase1_Avg', 'Var5_phase1_Max', 'Var5_phase1_Min',
       'Var5_phase4_Avg', 'Var5_phase4_Max', 'Var5_phase4_Min', 
       
       'Var6_phase2_Avg', 'Var6_phase2_Max', 'Var6_phase2_Min',
       'Var6_phase3_Avg', 'Var6_phase3_Max',
       'Var6_phase1_Avg', 'Var6_phase1_Max',

       'Var7_phase2_Avg', 'Var7_phase2_Max', 'Var7_phase2_Min',
       'Var7_phase3_Avg', 'Var7_phase3_Max', 'Var7_phase3_Min',
       'Var7_phase1_Avg', 'Var7_phase1_Max', 'Var7_phase1_Min',
       'Var7_phase4_Avg', 'Var7_phase4_Max', 'Var7_phase4_Min',

       'Var8_phase2_Avg', 'Var8_phase2_Max', 'Var8_phase2_Min', 
       'Var8_phase3_Avg', 'Var8_phase3_Max', 'Var8_phase3_Min',
       'Var8_phase1_Avg', 'Var8_phase1_Max', 'Var8_phase1_Min',
       'Var8_phase4_Avg', 'Var8_phase4_Max', 'Var8_phase4_Min'

每列表示平均值、最大值和最小值,每一行表示一天中的每个阶段以及以后的每个不同测量。

mctunoxg

mctunoxg1#

使用axes[int(i/3)][0]时假设列列表以3个为一组,但实际情况并非如此。
根据列的命名方式,您可以:

  • 创建不带后缀的名称列表(例如'Var1_phase2'对应'Var1_phase2_Avg'
  • 可选地对列表进行排序
  • 使用名称的数量计算nrows=len(filas)
  • 遍历行名称,并检查是否存在具有该名称和其中一个后缀的列
  • 如果列存在,则绘制图
  • 如果该列不存在,请删除空图

或者,您可以共享图的x和/或y轴,这使它们更易于比较。这些是plt.subplots(..., sharex=True, sharey=True中的参数。这还将跳过轴的重复标记,从而节省一些空间(但如果有许多行,则可能不需要;如果是,您可以通过axes[i, j].tick_params(..., labelbottom=True)再次启用它们)。

import matplotlib.pyplot as plt
import seaborn as sns
import pandas as pd

columnas_numeric = df_agg_new.select_dtypes(include=['float64', 'int64']).columns
columnas_numeric = columnas_numeric.drop('season')
# columnas_numeric = ['Var1_phase2_Avg', 'Var1_phase2_Max', 'Var1_phase2_Min', 'Var1_phase3_Avg', 'Var1_phase3_Max', 'Var1_phase3_Min', 'Var1_phase1_Avg', 'Var1_phase1_Max', 'Var1_phase1_Min', 'Var1_phase4_Avg', 'Var1_phase4_Max', 'Var1_phase4_Min', 'Var2_phase2_Avg', 'Var2_phase2_Max', 'Var2_phase3_Avg', 'Var2_phase3_Max', 'Var2_phase1_Avg', 'Var2_phase1_Max', 'Var2_phase4_Avg', 'Var2_phase4_Max', 'Var3_phase2_Avg', 'Var3_phase2_Max', 'Var3_phase2_Min', 'Var3_phase3_Avg', 'Var3_phase3_Max', 'Var3_phase3_Min', 'Var3_phase1_Avg', 'Var3_phase1_Max', 'Var3_phase1_Min', 'Var3_phase4_Avg', 'Var3_phase4_Max', 'Var3_phase4_Min', 'Var4_phase2_Avg', 'Var4_phase2_Max', 'Var4_phase3_Avg', 'Var4_phase3_Max', 'Var4_phase1_Avg', 'Var4_phase1_Max', 'Var4_phase4_Avg', 'Var4_phase4_Max', 'Var5_phase2_Avg', 'Var5_phase2_Max', 'Var5_phase2_Min', 'Var5_phase3_Avg', 'Var5_phase3_Max', 'Var5_phase3_Min', 'Var5_phase1_Avg', 'Var5_phase1_Max', 'Var5_phase1_Min', 'Var5_phase4_Avg', 'Var5_phase4_Max', 'Var5_phase4_Min', 'Var6_phase2_Avg', 'Var6_phase2_Max', 'Var6_phase2_Min', 'Var6_phase3_Avg', 'Var6_phase3_Max', 'Var6_phase1_Avg', 'Var6_phase1_Max', 'Var7_phase2_Avg', 'Var7_phase2_Max', 'Var7_phase2_Min', 'Var7_phase3_Avg', 'Var7_phase3_Max', 'Var7_phase3_Min', 'Var7_phase1_Avg', 'Var7_phase1_Max', 'Var7_phase1_Min', 'Var7_phase4_Avg', 'Var7_phase4_Max', 'Var7_phase4_Min', 'Var8_phase2_Avg', 'Var8_phase2_Max', 'Var8_phase2_Min', 'Var8_phase3_Avg', 'Var8_phase3_Max', 'Var8_phase3_Min', 'Var8_phase1_Avg', 'Var8_phase1_Max', 'Var8_phase1_Min', 'Var8_phase4_Avg', 'Var8_phase4_Max', 'Var8_phase4_Min']

filas = [colum[:-4] for colum in columnas_numeric if colum.endswith('_Avg')]
filas = sorted(filas) # optionally sort the list for the rows

fig, axes = plt.subplots(nrows=len(filas), ncols=3, figsize=(20, 96))

for ax_row, fila in zip(axes, filas):
    for ax, suffix in zip(ax_row, ['Max', 'Avg', 'Min']):
        colum = fila + '_' + suffix
        if not colum in columnas_numeric:
            ax.remove()  # remove empty subplot when there is no data
        else:
            sns.histplot(
                data=df_agg_new,
                x=colum,
                stat="count",
                kde=True,
                line_kws={'linewidth': 2},
                alpha=0.3,
                ax=ax
            )
            ax.set_title(colum, fontsize=7, fontweight="bold")
            ax.tick_params(labelsize=6, labelbottom=True)
            ax.set_xlabel("")

fig.tight_layout()
plt.subplots_adjust(top=0.97)
fig.suptitle('Distribution plots', fontsize=10, fontweight="bold");
plt.show()

相关问题