Benchmarks

This page records a small reproducible aperture-sum benchmark. Timings are machine-specific; see Performance for API-selection and parallel-threshold guidance.

The repository benchmark script validates numerical agreement before it prints timings. This page runs the same script with --repeats 5 and shows a focused subset: float32 exact aperture sums for circles, ellipses, rectangles, and pills at 1, 100, and 10,000 positions.

The benchmark labels are:

aap: the normal object API;
aapr: the optimized astroapers path for that row, usually a direct Rust-backed kernel;
pg: photutils.geometry, low-level Photutils overlap kernels where available;
pa: photutils.Aperture, Photutils aperture objects;
sep: SEP aperture sums where available.

from io import StringIO
from pathlib import Path
import subprocess
import sys

import pandas as pd


for parent in [Path.cwd(), *Path.cwd().parents]:
    benchmark_script = parent / "benchmarks" / "benchmark_apertures.py"
    if benchmark_script.exists():
        break
else:
    raise FileNotFoundError("could not find benchmarks/benchmark_apertures.py")

benchmark_cmd = [
    sys.executable,
    str(benchmark_script),
    "--repeats",
    "5",
    "--format",
    "csv",
    "--tasks",
    "apsum",
    "--dtypes",
    "float32",
    "--shapes",
    "circle",
    "ellipse",
    "rectangle",
    "pill",
    "--counts",
    "1",
    "100",
    "10000",
]

benchmark_output = subprocess.run(
    benchmark_cmd,
    check=True,
    capture_output=True,
    text=True,
).stdout

benchmark_lines = [
    line for line in benchmark_output.splitlines()
    if line and not line.startswith("#")
]
benchmark_csv = "\n".join(
    line for line in benchmark_lines
    if not line.startswith("task,shape,dtype,n_apertures,fastest_library")
)
benchmark_raw = pd.read_csv(StringIO(benchmark_csv))
benchmark_raw = benchmark_raw[benchmark_raw["speedup_vs_library"].isna()].copy()
benchmark_raw["library"] = benchmark_raw["library"].replace(
    {
        "photutils.geometry": "pg",
        "photutils.Aperture": "pa",
    }
)

benchmark_summary = (
    benchmark_raw
    .pivot_table(
        index=["shape", "n_apertures"],
        columns="library",
        values="seconds",
        aggfunc="first",
    )
    .reset_index()
)

for column in ["aapr", "aap", "pg", "pa", "sep"]:
    if column in benchmark_summary:
        benchmark_summary[f"{column}_us"] = benchmark_summary[column] * 1.0e6

for column in ["aap", "pg", "pa", "sep"]:
    if column in benchmark_summary:
        benchmark_summary[f"{column}/aapr"] = (
            benchmark_summary[column] / benchmark_summary["aapr"]
        )

display_columns = [
    "shape",
    "n_apertures",
    "aapr_us",
    "aap/aapr",
    "pg/aapr",
    "pa/aapr",
    "sep/aapr",
]
display_columns = [column for column in display_columns if column in benchmark_summary]
benchmark_table = benchmark_summary[display_columns].copy()

for column in benchmark_table.columns:
    if column.endswith("_us"):
        benchmark_table[column] = benchmark_table[column].map(lambda value: f"{value:.1f}")
    elif column.endswith("/aapr"):
        benchmark_table[column] = benchmark_table[column].map(
            lambda value: "--" if pd.isna(value) else f"{value:.1f}x"
        )

benchmark_table

library	shape	n_apertures	aapr_us	aap/aapr	pg/aapr	pa/aapr	sep/aapr
0	circle	1	30.5	1.5x	1.6x	5.6x	1.0x
1	circle	100	176.5	1.1x	8.6x	14.4x	1.7x
2	circle	10000	8230.8	1.0x	16.7x	28.1x	2.8x
3	ellipse	1	44.8	1.4x	2.3x	6.7x	1.1x
4	ellipse	100	472.2	1.0x	9.8x	13.2x	1.8x
5	ellipse	10000	27343.5	1.0x	16.3x	21.5x	2.8x
6	pill	1	108.2	1.5x	--	--	--
7	pill	100	2220.7	1.1x	--	--	--
8	pill	10000	207949.3	1.0x	--	--	--
9	rectangle	1	40.4	1.4x	--	10.4x	--
10	rectangle	100	197.9	1.1x	--	88.3x	--
11	rectangle	10000	8799.1	1.1x	--	195.9x	--

The exact numbers are hardware-dependent, but the ratios are the useful part: values larger than 1x mean that backend was slower than aapr. The script interleaves timing order, validates results before reporting times, and drops one fastest and one slowest sample when at least three samples are available. For very large Photutils rows the script may reduce the effective Photutils repeat count to keep the benchmark from dominating docs build time; this is reported in the benchmark script notes.