偏离#

import matplotlib.pyplot as plt
import numpy as np
import polars as pl
from plotnine import *

Diverging Bar#

mtcars = pl.read_csv("data/mtcars.csv")
mtcars.head()
shape: (5, 13)
mpgcyldisphpdratwtqsecvsamgearcarbfastcars
f64i64f64i64f64f64f64i64i64i64i64i64str
4.5825766160.01103.92.6216.4601441"Mazda RX4"
4.5825766160.01103.92.87517.0201441"Mazda RX4 Wag"
4.7749354108.0933.852.3218.6111411"Datsun 710"
4.6260136258.01103.083.21519.4410311"Hornet 4 Drive"
4.324358360.01753.153.4417.0200321"Hornet Sportabout"
mtcars = mtcars.with_columns(
    mpg_z=(pl.col("mpg") - pl.col("mpg").mean()) / pl.col("mpg").std()
)
mtcars_sorted = (
    mtcars.with_columns(
        types=pl.col("mpg_z").map_batches(
            lambda x: np.where(x < 0, "red", "green"), return_dtype=pl.String
        )
    )
    .sort(by="mpg_z")
    .with_row_index()
)
mtcars_sorted.head()
shape: (5, 16)
indexmpgcyldisphpdratwtqsecvsamgearcarbfastcarsmpg_ztypes
u32f64i64f64i64f64f64f64i64i64i64i64i64strf64str
03.2249038472.02052.935.2517.9800340"Cadillac Fleetwood"-1.829979"red"
13.2249038460.02153.05.42417.8200340"Lincoln Continental"-1.829979"red"
23.6469178350.02453.733.8415.4100340"Camaro Z28"-1.191664"red"
33.7815348360.02453.213.5715.8400340"Duster 360"-0.988049"red"
43.8340588440.02303.235.34517.4200340"Chrysler Imperial"-0.908604"red"
_, ax = plt.subplots(figsize=(10, 6))

bar = ax.barh(
    y=mtcars_sorted["index"],
    width=mtcars_sorted["mpg_z"],
    color=mtcars_sorted["types"],
    alpha=0.4,
    lw=5,
)

ax.bar_label(container=bar, fmt="%0.2g", label_type="edge", padding=3)

ax.set(
    xlabel="Mileage",
    ylabel="Model",
    yticks=mtcars_sorted["index"],
    yticklabels=mtcars_sorted["cars"],
    title="Diverging Bars of Car Mileage",
)
ax.tick_params(axis="both", labelsize="medium")
ax.grid(linestyle="--", alpha=0.5)
../_images/350b47f669c4ddb3f4841192e18877df7faeb52f02eb8e9012a3c551a41e27b8.png

ggplot2 version#

g = ggplot(mtcars_sorted, aes(x="cars", y="mpg_z", label="mpg_z"))

(
    g
    + geom_bar(aes(fill="types"), stat="identity", width=0.5)
    + scale_fill_manual(
        name="Mileage",
        labels=["Above Average", "Below Average"],
        values={"green": "#00ba38", "red": "#f8766d"},
    )
    + scale_x_discrete(limits=mtcars_sorted["cars"])
    + labs(title="Diverging Bars of Car Mileage", x="Model", y="Mileage")
    + theme(plot_title=element_text(hjust=0.5))
    + coord_flip()
)

Diverging Lollipop#

_, ax = plt.subplots(figsize=(10, 6))

ax.scatter(
    mtcars_sorted["mpg_z"],
    mtcars_sorted["index"],
    color=mtcars_sorted["types"],
    s=200,
    alpha=0.6,
)

ax.barh(
    y=mtcars_sorted["index"],
    width=mtcars_sorted["mpg_z"],
    color=mtcars_sorted["types"],
    alpha=0.4,
    height=0.1,
)

ax.annotate(
    "Mercedes Models",
    xy=(0.0, 11.0),
    xytext=(1.0, 11),
    xycoords="data",
    fontsize="medium",
    ha="center",
    va="center",
    color="white",
    bbox={"boxstyle": "square", "fc": "firebrick"},
    arrowprops={
        "arrowstyle": "-[, widthB=2.0, lengthB=1.5",
        "lw": 2.0,
        "color": "steelblue",
    },
)

ax.set(
    xlabel="Mileage",
    ylabel="Model",
    yticks=mtcars.with_row_index()["index"],
    yticklabels=mtcars["cars"],
    title="Diverging Bars of Car Mileage",
)
ax.tick_params(axis="both", labelsize="medium")
ax.grid(linestyle="--", alpha=0.5)
../_images/e6b1e154a48983fb7fc6c0b110ec52de562f28a92465cf4b293cb2adcae59ffe.png

ggplot2 version#

(
    g
    + geom_point(aes(fill="types"), stat="identity", size=8)
    + scale_fill_manual(
        name="Mileage",
        labels=["Above Average", "Below Average"],
        values={"green": "#00ba38", "red": "#f8766d"},
    )
    + geom_segment(aes(y=0, x="cars", yend="mpg_z", xend="cars", color="types"))
    + scale_color_manual(
        name="Mileage",
        labels=["Above Average", "Below Average"],
        values={"green": "#00ba38", "red": "#f8766d"},
    )
    + scale_x_discrete(limits=mtcars_sorted["cars"])
    + labs(
        title="Diverging Lollipop Chart",
        subtitle="Normalized mileage from 'mtcars': Lollipop",
        x="Model",
        y="Mileage",
    )
    + ylim(-2.5, 2.5)
    + coord_flip()
)

Diverging Dot#

_, ax = plt.subplots(figsize=(10, 6))

scat = ax.scatter(
    mtcars_sorted["mpg_z"],
    mtcars_sorted["index"],
    s=450,
    alpha=0.6,
    color=mtcars_sorted["types"],
)

for x, y, text in zip(
    mtcars_sorted["mpg_z"], mtcars_sorted["index"], mtcars_sorted["mpg_z"]
):
    ax.text(
        x,
        y,
        round(text, 1),
        horizontalalignment="center",
        verticalalignment="center",
        fontdict={"color": "white"},
    )

ax.spines[["top", "right"]].set_visible(False)
ax.set(
    xlabel="Mileage",
    ylabel="Model",
    yticks=mtcars_sorted["index"],
    yticklabels=mtcars_sorted["cars"],
    title="Diverging Dotplot of Car Mileage",
)
ax.tick_params(axis="both", labelsize="medium")
ax.grid(linestyle="--", alpha=0.5)
../_images/dc6104324eff5e2179ca5979de6fabedca6a4775ca2968da757724136dac78e8.png
(
    g
    + geom_point(aes(fill="types"), stat="identity", size=8)
    + scale_fill_manual(
        name="Mileage",
        labels=["Above Average", "Below Average"],
        values={"green": "#00ba38", "red": "#f8766d"},
    )
    + geom_text(color="white", size=6, format_string="{:.2f}")
    + labs(title="Diverging Dotplot of Car Mileage", x="Model", y="Mileage")
    + theme(plot_title=element_text(hjust=0.5))
    + scale_x_discrete(limits=mtcars_sorted["cars"])
    + ylim(-2.5, 2.5)
    + coord_flip()
)