import numpy as np
import pandas as pd
import altair as alt
# --- Parameters ---
n_elements = [
1.0 * 10**4,
6.1 * 10**8,
2.6 * 10**18,
7.4 * 10**32,
4.8 * 10**37,
8.9 * 10**56,
1.6 * 10**76,
]
output_powers = [
32,
64,
128,
224,
256,
384,
512,
]
n_output_dims = [2**p for p in output_powers]
# --- Build long-form DataFrame ---
rows = []
for n in n_elements:
for power, d in zip(output_powers, n_output_dims):
prob = (1 - np.exp(-n * (n - 1) / (2 * d))) * 100
rows.append(
{
"Elements (n)": n,
"Output Dimensions (d)": d,
"Output Dimension": power,
"Collision %": round(prob, 1),
}
)
df = pd.DataFrame(rows)
# String labels for ordinal axes
def sci_notation(x):
# Format in scientific notation, e.g. 1e+06
return f"{int(x):.0e}"
df["n_label"] = df["Elements (n)"].apply(sci_notation)
df["d_label"] = df["Output Dimension"].apply(lambda p: f"2^{p}")
df["cell_text"] = df["Collision %"].apply(lambda x: f"{x:.1f}%")
# Sort orders (output powers: high at top; elements: left to right)
output_powers_sorted = [f"2^{p}" for p in output_powers] # low at top
elements_sorted = [
sci_notation(x) for x in n_elements[::-1]
] # scientific notation, high to low, left to right
# --- Shared encodings (TRANSPOSE axes) ---
base = alt.Chart(df).encode(
x=alt.X(
"n_label:N",
sort=elements_sorted,
title="Number of Elements (n)",
axis=alt.Axis(labelAngle=-45),
),
y=alt.Y(
"d_label:N",
sort=output_powers_sorted,
title="Output Dimension of Hash (d)",
),
)
# --- Heatmap layer ---
heatmap = base.mark_rect(stroke="white", strokeWidth=1).encode(
color=alt.Color(
"Collision %:Q",
scale=alt.Scale(domain=[0, 50, 100], range=["#2ecc71", "#f1c40f", "#e74c3c"]),
legend=alt.Legend(title="Collision %"),
),
tooltip=[
alt.Tooltip("Elements (n):Q", title="Elements (n)"),
alt.Tooltip(
"Output Dimensions (d):Q", title="Output Dimension (d)", format=","
),
alt.Tooltip("Output Power:Q", title="Hash Output Power"),
alt.Tooltip("Collision %:Q", title="Collision %", format=".1f"),
],
)
# --- Text label layer ---
text = base.mark_text(fontSize=10, fontWeight="bold").encode(
text="cell_text:N",
color=alt.condition(
alt.datum["Collision %"] >= 0.0,
alt.value("white"),
alt.value("black"),
),
)
# --- Combine ---
chart = (heatmap + text).properties(
title="Collision Probability - P ≈ 1 − exp(−n(n−1) / 2d)",
width=550,
height=350,
)
chart