排序

Contents

排序#

import matplotlib.pyplot as plt
import numpy as np
import polars as pl
from plotnine import *

Lollipop#

mpg = pl.read_csv("data/mpg.csv")
mpg.head()
shape: (5, 11)
manufacturermodeldisplyearcyltransdrvctyhwyflclass
strstrf64i64i64strstri64i64strstr
"audi""a4"1.819994"auto(l5)""f"1829"p""compact"
"audi""a4"1.819994"manual(m5)""f"2129"p""compact"
"audi""a4"2.020084"manual(m6)""f"2031"p""compact"
"audi""a4"2.020084"auto(av)""f"2130"p""compact"
"audi""a4"2.819996"auto(l5)""f"1626"p""compact"
mpg_group = (
    mpg[:, ["cty", "manufacturer"]].group_by("manufacturer").agg(pl.col("cty").mean())
)
mpg_group.head()
shape: (5, 2)
manufacturercty
strf64
"honda"24.444444
"jeep"13.5
"toyota"18.529412
"ford"14.0
"subaru"19.285714
mpg_group = mpg_group.sort(by="cty").with_row_index()
mpg_group.head()
shape: (5, 3)
indexmanufacturercty
u32strf64
0"lincoln"11.333333
1"land rover"11.5
2"dodge"13.135135
3"mercury"13.25
4"jeep"13.5
(
    ggplot(mpg_group, aes(x="manufacturer", y="cty", label="cty"))
    + geom_point(size=3, color="firebrick")
    + geom_segment(
        aes(x="manufacturer", xend="manufacturer", y=0, yend="cty"), color="firebrick"
    )
    + geom_text(color="black", size=4, nudge_y=1, format_string="{:.2f}")
    + labs(title="Lollipop Chart", subtitle="Manufacturer Vs Avg. Mileage")
    + scale_x_discrete(limits=mpg_group["manufacturer"])
    + theme(axis_text_x=element_text(angle=45, vjust=1))
    + ylim(0, 30)
)

Slope#

gdp = pl.read_csv("data/gdp_per_cap.csv")
gdp.head()
shape: (5, 3)
continent19521957
strf64f64
"Africa"1252.5724661385.236062
"Americas"4079.0625524616.043733
"Asia"5195.4840044003.13294
"Europe"5661.0574356963.012816
"Oceania"10298.0856511598.522455
gdp_new = gdp.unpivot(
    on=["1952", "1957"], index="continent", variable_name="time", value_name="total"
).with_columns(continent=pl.col("continent").cast(pl.Categorical))
gdp_new
shape: (10, 3)
continenttimetotal
catstrf64
"Africa""1952"1252.572466
"Americas""1952"4079.062552
"Asia""1952"5195.484004
"Europe""1952"5661.057435
"Oceania""1952"10298.08565
"Africa""1957"1385.236062
"Americas""1957"4616.043733
"Asia""1957"4003.13294
"Europe""1957"6963.012816
"Oceania""1957"11598.522455
_, ax = plt.subplots(figsize=(10, 5))

for cont in gdp_new["continent"]:
    gdp_newi = gdp_new.filter(pl.col("continent") == cont)
    ax.plot(gdp_newi["time"], gdp_newi["total"], label=cont)

for i in [0, 1]:
    ax.vlines(
        x=i,
        ymin=gdp_new["total"].min(),
        ymax=gdp_new["total"].max(),
        colors="k",
        linestyle="dotted",
    )

ax.set(
    xlabel="Time",
    ylabel="Mean GDP Per Capita",
    title="Slopechart: Comparing GDP Per Capita between 1952 vs 1957\n",
    xticks=[0, 1],
    xticklabels=["1952", "1957"],
)

ax.legend(
    gdp_new["continent"].unique(),
    loc="center",
    bbox_to_anchor=(0.5, 1),
    ncol=len(gdp_new["continent"].unique()),
    fontsize="small",
)
ax.spines[["right", "top", "left"]].set_visible(False)
gdp = gdp.with_columns(Class=np.where(gdp["1957"] - gdp["1952"] < 0, "red", "green"))
gdp
shape: (5, 4)
continent19521957Class
strf64f64str
"Africa"1252.5724661385.236062"green"
"Americas"4079.0625524616.043733"green"
"Asia"5195.4840044003.13294"red"
"Europe"5661.0574356963.012816"green"
"Oceania"10298.0856511598.522455"green"
(
    ggplot(gdp)
    + geom_segment(aes(x=1, xend=2, y="1952", yend="1957", color="Class"), size=0.75)
    + geom_vline(xintercept=1, linetype="dashed", size=0.1)
    + geom_vline(xintercept=2, linetype="dashed", size=0.1)
    + scale_color_manual(
        labels=["Up", "Down"], values={"green": "#00ba38", "red": "#f8766d"}
    )
    + labs(x="", y="Mean GdpPerCap")
    + xlim(0.5, 2.5)
    + theme(
        panel_background=element_blank(),
        panel_grid=element_blank(),
        axis_ticks=element_blank(),
        # axis_text_x=element_blank(),
        panel_border=element_blank(),
        legend_position="none",
    )
)