python繪製雲雨圖raincloud plot

2022-08-04 22:05:25

官方github: https://github.com/RainCloudPlots/RainCloudPlots

Raincloud 的 Python 實現是一個名為 PtitPrince 的包,它寫在 seaborn 之上,這是一個 Python 繪相簿,用於從 pandas 資料框中獲取漂亮的繪圖。

import pandas as pd
import seaborn as sns
import os
import matplotlib.pyplot as plt
import matplotlib.collections as clt
import ptitprince as pt
savefigs = True
figs_dir = '../figs/tutorial_python'
if savefigs:
    # Make the figures folder if it doesn't yet exist
    if not os.path.isdir('../figs/tutorial_python'):

def export_fig(axis,text, fname):
    if savefigs:
        axis.savefig(fname, bbox_inches='tight')     
df = pd.read_csv ("simdat.csv", sep= ",")

該圖可以讓讀者初步瞭解資料集:哪個組的平均值更大,這種差異是否可能顯著。 此圖中僅顯示每組分數的平均值和標準差。

f, ax = plt.subplots(figsize=(7, 7))
sns.barplot(x = "group", y = "score", data = df, capsize= .1)
plt.title("Figure P1n Bar Plot")
if savefigs:
    plt.savefig('.\figs\tutorial_python\figureP01.png', bbox_inches='tight')


# plotting the clouds
f, ax = plt.subplots(figsize=(7, 5))
pal = sns.color_palette(n_colors=1)
ax=pt.half_violinplot(x=dx, y=dy, data=df, palette=pal, bw=.2, cut=0., scale="area", width=.6, inner=None, orient=ort)
plt.title("Figure P2n Basic Rainclouds")
if savefigs:
    plt.savefig('.\figs\tutorial_python\figureP02.png', bbox_inches='tight')


# adding the rain
f, ax=plt.subplots(figsize=(7, 5))
ax=pt.half_violinplot(x=dx, y=dy, data=df, palette=pal, bw=.2, cut=0., scale="area", width=.6, inner=None, orient=ort)
ax=sns.stripplot(x=dx, y=dy, data=df, palette=pal, edgecolor="white", size=3, jitter=0, zorder=0, orient=ort)
plt.title("Figure P3n Raincloud Without Jitter")
if savefigs:
    plt.savefig('.\figs\tutorial_python\figureP03.png', bbox_inches='tight')

# adding jitter to the rain
f, ax =plt.subplots(figsize=(7, 5))
ax=pt.half_violinplot(x=dx, y=dy, data=df, palette=pal, bw=.2, cut=0., scale="area", width=.6, inner=None, orient=ort)
ax=sns.stripplot(x=dx, y=dy, data=df, palette=pal, edgecolor="white", size=3, jitter=1, zorder=0, orient=ort)
plt.title("Figure P4n Raincloud with Jittered Data")
if savefigs:
    plt.savefig('.\figs\tutorial_python\figureP04.png', bbox_inches='tight')

這樣可以很好地瞭解資料點的分佈情況,但中位數和四分位數並不明顯,很難一目瞭然地確定統計差異。 因此,我們新增了一個“空”箱線圖來顯示中位數、四分位數和異常值:

#adding the boxplot with quartiles
f, ax=plt.subplots(figsize=(7, 5))
ax=pt.half_violinplot(x=dx, y=dy, data=df, palette=pal, bw=.2, cut=0.,
                      scale="area", width=.6, inner=None, orient=ort)
ax=sns.stripplot(x=dx, y=dy, data=df, palette=pal, edgecolor="white",
                 size=3, jitter=1, zorder=0, orient=ort)
ax=sns.boxplot(x=dx, y=dy, data=df, color="black", width=.15, zorder=10,
               showcaps=True, boxprops={'facecolor':'none',"zorder":10},
               showfliers=True, whiskerprops{'linewidth':2,"zorder":10},
               saturation=1, orient=ort)
plt.title("Figure P5n Raincloud with Boxplot")
if savefigs:
    plt.savefig('../figs/tutorial_python/figureP05.png', bbox_inches='tight')


#adding color
f, ax=plt.subplots(figsize=(7, 5))
ax=pt.half_violinplot(x=dx, y=dy, data=df, palette=pal, bw=.2, cut=0.,
                      scale="area", width=.6, inner=None, orient=ort)
ax=sns.stripplot(x=dx, y=dy, data=df, palette=pal, edgecolor="white",
                 size=3, jitter=1, zorder=0, orient=ort)
ax=sns.boxplot(x=dx, y=dy, data=df, color="black", width=.15, zorder=10,
              showcaps=True, boxprops={'facecolor':'none',"zorder":10},
              showfliers=True, whiskerprops={'linewidth':2,"zorder":10},
              saturation=1, orient=ort)
plt.title("Figure P6n Tweaking the Colour of Your Raincloud")

我們可以使用函數 pt.Raincloud 來新增一些自動化:

#same thing with a single command: now x **must** be the categorical value
dx="group"; dy="score"; ort="h"; pal="Set2"; sigma=.2
f, ax=plt.subplots(figsize=(7, 5))
pt.RainCloud(x=dx, y=dy, data=df, palette=pal, bw=sigma,
             width_viol = .6, ax = ax, orient = ort)
plt.title("Figure P7n Using the pt.Raincloud function")
if savefigs:
    plt.savefig('../figs/tutorial_python/figureP07.png', bbox_inches='tight')

‘move’ 引數可用於移動箱線圖下方的雨量,在某些情況下提供更好的原始資料可見性:

#moving the rain below the boxplot
dx="group"; dy="score"; ort="h"; pal="Set2"; sigma=.2
f,ax=plt.subplots(figsize=(7, 5))
ax=pt.RainCloud(x=dx, y=dy, data=df, palette=pal, bw=sigma,
                 width_viol=.6, ax=ax, orient=ort, move=.2)
plt.title("Figure P8n Rainclouds with Shifted Rain")

此外,raincloud 函數同樣適用於列表或 np.array,如果您更喜歡使用它們而不是資料框輸入:

# Usage with a list/np.array input
dx=list(df["group"]); dy=list(df["score"])
f, ax=plt.subplots(figsize=(7, 5))
ax=pt.RainCloud(x=dx, y=dy, palette=pal, bw=sigma,
                 width_viol=.6, ax=ax, orient=ort)
plt.title("Figure P9n Rainclouds with List/Array Inputs")

對於某些資料,您可能希望將雨雲的方向翻轉為“petit prince”圖。 您可以使用 pt.RainCloud 函數中的 ‘orient’ 標誌來執行此操作:

# Changing orientation
dx="group"; dy="score"; ort="v"; pal="Set2"; sigma=.2
f, ax=plt.subplots(figsize=(7, 5))
ax=pt.RainCloud(x=dx, y=dy, data=df, palette=pal, bw=sigma,
                 width_viol=.5, ax=ax, orient=ort)
plt.title("Figure P10n Flipping your Rainclouds")

還可以更改用於生成資料概率分佈函數的平滑核。 為此,您調整 sigma 引數:

#changing cloud smoothness
dx="group"; dy="score"; ort="h"; pal="Set2"; sigma=.05
f, ax=plt.subplots(figsize=(7, 5))
ax=pt.RainCloud(x=dx, y=dy, data=df, palette=pal, bw=sigma,
                 width_viol=.6, ax=ax, orient=ort)
plt.title("Figure P11n Customizing Raincloud Smoothness")

最後,使用 pointplot 標誌,您可以新增一條連線組平均值的線。 這對於更復雜的資料集很有用,例如重複測量或因子資料。 下面我們通過改變各個圖的色調、不透明度或閃避元素來說明使用雨雲繪製此類資料的幾種不同方法:

#adding a red line connecting the groups' mean value (useful for longitudinal data)
dx="group"; dy="score"; ort="h"; pal="Set2"; sigma=.2
f, ax=plt.subplots(figsize=(7, 5))
ax=pt.RainCloud(x=dx, y=dy, data=df, palette=pal, bw=sigma,
                 width_viol=.6, ax=ax, orient=ort, pointplot=True)
plt.title("Figure P12n Adding Lineplots to Emphasize Factorial Effects")

另一個靈活的選擇是使用 Facet Grids 來分隔不同的組或因子水平,


# Rainclouds with FacetGrid
g=sns.FacetGrid(df, col="gr2", height=6)
g=g.map_dataframe(pt.RainCloud, x="group", y="score", data=df, orient="h")
g.fig.suptitle("Figure P13n Using FacetGrid for More Complex Designs",  fontsize=26)


# Hue Input for Subgroups
dx="group"; dy="score"; dhue="gr2"; ort="h"; pal="Set2"; sigma=.2
f, ax=plt.subplots(figsize=(12, 5))
ax=pt.RainCloud(x=dx, y=dy, hue=dhue, data=df, palette=pal, bw=sigma,
                 width_viol=.7, ax=ax, orient=ort)
plt.title("Figure P14n Rainclouds with Subgroups")

為了提高該圖的可讀性,我們使用相關標誌(0-1 alpha 強度)調整 alpha 級別:

# Setting alpha level
f, ax=plt.subplots(figsize=(12, 5))
ax=pt.RainCloud(x=dx, y=dy, hue=dhue, data=df, palette=pal, bw=sigma,
                 width_viol=.7, ax=ax, orient=ort , alpha=.65)
plt.title("Figure P15n Adjusting Raincloud Alpha Level")

我們可以將 dodge 標誌設定為 true,而不是讓兩個箱線圖相互混淆,從而增加互動性:

#The Doge Flag
f, ax=plt.subplots(figsize=(12, 5))
ax=pt.RainCloud(x=dx, y=dy, hue=dhue, data=df, palette=pal, bw=sigma,
                 width_viol=.7, ax=ax, orient=ort , alpha=.65, dodge=True)
plt.title("Figure P16n The Boxplot Dodge Flag")



#same, with dodging and line
f, ax=plt.subplots(figsize=(12, 5))
ax=pt.RainCloud(x=dx, y=dy, hue=dhue, data=df, palette=pal, bw=sigma, 
                width_viol=.7, ax=ax, orient=ort , alpha=.65, 
                dodge=True, pointplot=True)
plt.title("Figure P17n Dodged Boxplots with Lineplots")


#moving the rain under the boxplot
f, ax=plt.subplots(figsize=(12, 5))
ax=pt.RainCloud(x=dx, y=dy, hue=dhue, data=df, palette=pal, bw=sigma, 
               width_viol=.7, ax=ax, orient=ort , alpha=.65, dodge=True, 
               pointplot=True, move=.2)
plt.title("Figure P18n Shifting the Rain with the Move Parameter")

作為我們的最後一個範例,我們將考慮具有兩組和三個時間點的複雜重複測量設計。 目標是說明我們複雜的相互作用和主要影響,同時保持雨雲圖的透明性:

# Load in the repeated data
df_rep=pd.read_csv("repeated_measures_data.csv", sep=",")
df_rep.columns=["score",  "timepoint", "group"]

# Plot the repeated measures data
dx="group"; dy="score"; dhue="timepoint"; ort="h"; pal="Set2"; sigma=.2
f, ax=plt.subplots(figsize=(12, 5))
ax=pt.RainCloud(x=dx, y=dy, hue=dhue, data=df_rep, palette=pal, bw=sigma, width_viol=.7,
               ax=ax, orient=ort , alpha=.65, dodge=True, pointplot=True, move=.2)
plt.title("Figure P19n Repeated Measures Data - Example 1")

# Now with the group as hue
dx="timepoint"; dy="score"; dhue="group"
f, ax=plt.subplots(figsize=(12, 5))
ax=pt.RainCloud(x=dx, y=dy, hue=dhue, data=df_rep, palette=pal, bw=sigma, width_viol=.7,
                ax=ax, orient=ort , alpha=.65, dodge=True, pointplot=True, move=.2)
plt.title("Figure P20n  Repeated Measures Data - Example 2")

到此這篇關於python繪製雲雨圖raincloud plot的文章就介紹到這了,更多相關python繪製雲雨圖內容請搜尋it145.com以前的文章或繼續瀏覽下面的相關文章希望大家以後多多支援it145.com!

