分组

分组#

import matplotlib.pyplot as plt
import numpy as np
import polars as pl
from matplotlib import patches
from scipy.spatial import ConvexHull

Bar#

mpg = pl.read_csv("data/mpg.csv")
mpg.head()
shape: (5, 11)
manufacturermodeldisplyearcyltransdrvctyhwyflclass
strstrf64i64i64strstri64i64strstr
"audi""a4"1.819994"auto(l5)""f"1829"p""compact"
"audi""a4"1.819994"manual(m5)""f"2129"p""compact"
"audi""a4"2.020084"manual(m6)""f"2031"p""compact"
"audi""a4"2.020084"auto(av)""f"2130"p""compact"
"audi""a4"2.819996"auto(l5)""f"1626"p""compact"
mpg_group = (
    mpg[:, ["cty", "manufacturer"]].group_by("manufacturer").mean().sort(by="cty")
)
mpg_group.head()
shape: (5, 2)
manufacturercty
strf64
"lincoln"11.333333
"land rover"11.5
"dodge"13.135135
"mercury"13.25
"jeep"13.5
fig, ax = plt.subplots(figsize=(10, 6), facecolor="white")

x = mpg_group["manufacturer"].str.to_uppercase()
y = mpg_group["cty"]

ax.bar(x=x, height=y)

for i, cty in enumerate(y):
    ax.text(i, cty + 0.5, round(cty, 1), horizontalalignment="center")

p1 = patches.Rectangle(
    (0.57, -0.005),
    width=0.33,
    height=0.13,
    alpha=0.1,
    facecolor="green",
    transform=fig.transFigure,
)
p2 = patches.Rectangle(
    (0.124, -0.005),
    width=0.446,
    height=0.13,
    alpha=0.1,
    facecolor="red",
    transform=fig.transFigure,
)
fig.add_artist(p1)
fig.add_artist(p2)

plt.setp(ax.get_xticklabels(), rotation=60, horizontalalignment="right")
ax.set(ylim=(0, 30), ylabel="Miles Per Gallon", title="Bar Chart for Highway Mileage")
[(0.0, 30.0),
 Text(0, 0.5, 'Miles Per Gallon'),
 Text(0.5, 1.0, 'Bar Chart for Highway Mileage')]
../_images/df5ab930025ea759f4885a5633780b9d5ee9551075f50bda9258dac359dc8053.png

Dendrogram#

from scipy.cluster import hierarchy
arrests = pl.read_csv("data/us_arrests.csv")
arrests.head()
shape: (5, 5)
MurderAssaultUrbanPopRapeState
f64i64i64f64str
13.22365821.2"Alabama"
10.02634844.5"Alaska"
8.12948031.0"Arizona"
8.81905019.5"Arkansas"
9.02769140.6"California"
_, ax = plt.subplots(figsize=(10, 6))

dend = hierarchy.dendrogram(
    hierarchy.linkage(
        arrests[["Murder", "Assault", "UrbanPop", "Rape"]], method="ward"
    ),
    labels=arrests["State"].to_numpy(),
    color_threshold=100,
)

ax.set(title="USArrests Dendograms")
plt.show()
../_images/5ac7af1ef2db95cf75cb6c381ab683fbbd7b0d6032ff09e2dd03389a3f4574d3.png

Encircling#

midwest = pl.read_csv("data/midwest_filter.csv")
midwest.with_columns(
    popdensity=midwest["poptotal"] / 100, state=midwest["state"].cast(pl.Categorical)
)
midwest.head()
shape: (5, 29)
PIDcountystateareapoptotalpopdensitypopwhitepopblackpopamerindianpopasianpopotherpercwhitepercblackpercamerindanpercasianpercotherpopadultsperchsdpercollegepercprofpoppovertyknownpercpovertyknownpercbelowpovertypercchildbelowpovertpercadultpovertypercelderlypovertyinmetrocategorydot_size
i64strstrf64i64f64i64i64i64i64i64f64f64f64f64f64i64f64f64f64i64f64f64f64f64f64i64strf64
561"ADAMS""IL"0.052660901270.961546391717029824912496.7120592.5752760.1482830.3767590.1876234329875.10739519.6313924.3558596362896.27477713.15144318.01171711.00977612.4438120"AAR"250.944411
562"ALEXANDER""IL"0.01410626759.0705434961948966.3843432.9004330.17880670.4517220.084698672459.72635311.2433082.8703151052999.08714532.24427845.82651427.38564725.2289760"LHR"185.78126
563"BOND""IL"0.02214991681.4090911447742935163496.5712762.8617170.2334730.1067310.226803966969.33498817.0338194.4885721423594.95697412.06884414.03606110.8520912.697410"AAR"175.905385
564"BOONE""IL"0.017308061812.117652934412746150113995.2541710.4122570.1493220.4869183.6973321927275.47218817.2789544.19783033798.4775697.20901911.1795365.5360136.2170471"ALU"319.823487
565"BROWN""IL"0.0185836324.2222225264547145690.1987669.3728580.239890.0856750.10281397968.86152314.4759993.36768481582.5051413.52024913.02288911.14321119.20"AAR"130.442161
midwest_select = midwest.filter(pl.col("state") == "IN")
midwest_select.head()
shape: (5, 29)
PIDcountystateareapoptotalpopdensitypopwhitepopblackpopamerindianpopasianpopotherpercwhitepercblackpercamerindanpercasianpercotherpopadultsperchsdpercollegepercprofpoppovertyknownpercpovertyknownpercbelowpovertypercchildbelowpovertpercadultpovertypercelderlypovertyinmetrocategorydot_size
i64strstrf64i64f64i64i64i64i64i64f64f64f64f64f64i64f64f64f64i64f64f64f64f64f64i64strf64
663"ADAMS""IN"0.021310951480.714293053036426042798.1829880.1157740.135070.1929571.3732111811974.39704216.115684.8622993049098.0543511.63660217.1945249.1018888.7140271"AAU"277.642023
665"BARTHOLOMEW""IN"0.022636572893.56177410059761017197.0419591.5787740.1523790.9582610.2686274121876.90329522.8468156.8440976278498.6285888.54517110.7368556.9924210.8119430"AAR"457.463283
666"BENTON""IN"0.0249441393.375938961612999.4492110.0635530.1694740.01059210.307171605377.08574313.4643984.014538930098.5065148.0430118.3492186.84232910.5022830"AAR"139.24402
667"BLACKFORD""IN"0.01140671406.713978744162299.3673140.0497620.31278880.1137410.1563944925972.98844412.9819634.4281241390398.8341519.85398812.3237458.33224710.93750"AAR"268.221385
668"BOONE""IN"0.024381471589.45833378148390946699.1270610.2175790.2359290.2464150.1730152491582.46839327.8306248.8139673740298.0470296.2964558.0217545.2395997.0894251"HLU"291.48311
_, ax = plt.subplots(figsize=(10, 8))

categories = np.unique(midwest["category"])
colors = [plt.cm.tab10(i / float(len(categories) - 1)) for i in range(len(categories))]

for i, category in enumerate(categories):
    plt.scatter(
        "area",
        "poptotal",
        data=midwest.filter(pl.col("category") == category),
        s="dot_size",
        color=colors[i],
        label=str(category),
        edgecolors="black",
        linewidths=0.5,
    )


# Encircling
def encircle(x, y, ax=None, **kw):
    ax = ax or plt.gca()
    p = np.stack([x, y], axis=1)
    hull = ConvexHull(p)
    poly = patches.Polygon(xy=p[hull.vertices, :], closed=True, **kw)
    ax.add_patch(poly)


ax.set(
    xlim=(0.0, 0.1),
    ylim=(0, 90000),
    xlabel="Area",
    ylabel="Population",
    title="Bubble Plot with Encircling",
)

x = midwest_select["area"]
y = midwest_select["poptotal"]

# Draw polygon surrounding vertices
encircle(x, y, ec="k", fc="gold", alpha=0.1, ax=ax)
encircle(x, y, ec="firebrick", fc="none", lw=1.5, ax=ax)
ax.legend()
<matplotlib.legend.Legend at 0x7fd0c4c674d0>
../_images/421d0b1575a018d427577c4963d560b8fde6613339f637e98ad4cd94137d76dc.png