Skip to content

Performance

Performance #3

Workflow file for this run

name: Performance
permissions:
contents: read
actions: read
on:
workflow_dispatch:
inputs:
platforms:
description: Which platforms to bench
type: choice
default: all
options: [all, linux, macos, windows]
source:
description: Where to fetch the kmpzip binary from
type: choice
default: ci
options: [ci, release]
release_tag:
description: Release tag (only used when source=release; "latest" picks the newest)
type: string
default: latest
sizes:
description: |
Comma-separated <kind>:<size> list, kind ∈ {text,rand,zero}.
Examples: "text:50M,rand:30M" or "text:100M,rand:100M,zero:500M"
type: string
default: text:50M,rand:30M,zero:50M
trials:
description: Number of timed trials per measurement (min wins)
type: string
default: "3"
release:
types: [published]
jobs:
setup:
runs-on: ubuntu-latest
outputs:
matrix: ${{ steps.gen.outputs.matrix }}
source: ${{ steps.params.outputs.source }}
release_tag: ${{ steps.params.outputs.release_tag }}
sizes: ${{ steps.params.outputs.sizes }}
trials: ${{ steps.params.outputs.trials }}
platforms: ${{ steps.params.outputs.platforms }}
steps:
- id: params
shell: bash
env:
EVENT_NAME: ${{ github.event_name }}
EVENT_TAG: ${{ github.event.release.tag_name }}
INPUT_SOURCE: ${{ inputs.source }}
INPUT_TAG: ${{ inputs.release_tag }}
INPUT_SIZES: ${{ inputs.sizes }}
INPUT_TRIALS: ${{ inputs.trials }}
INPUT_PLATFORMS: ${{ inputs.platforms }}
run: |
# On release events, source is fixed to 'release' and the tag is the
# just-published release; on workflow_dispatch we honor the inputs.
if [ "$EVENT_NAME" = "release" ]; then
source="release"
release_tag="${EVENT_TAG:-latest}"
else
source="${INPUT_SOURCE:-ci}"
release_tag="${INPUT_TAG:-latest}"
fi
{
echo "source=$source"
echo "release_tag=$release_tag"
echo "sizes=${INPUT_SIZES:-text:50M,rand:30M,zero:50M}"
echo "trials=${INPUT_TRIALS:-3}"
echo "platforms=${INPUT_PLATFORMS:-all}"
} >> "$GITHUB_OUTPUT"
echo "Resolved: source=$source tag=$release_tag platforms=${INPUT_PLATFORMS:-all}"
- id: gen
shell: bash
env:
PLATFORMS: ${{ steps.params.outputs.platforms }}
run: |
all='[
{"os":"ubuntu-latest","artifact":"kmpzip-linux-x64","bin":"bin/kmpzip-linux-x64","platform":"linux"},
{"os":"macos-latest","artifact":"kmpzip-macos-arm64","bin":"bin/kmpzip-macos-arm64","platform":"macos"},
{"os":"windows-latest","artifact":"kmpzip-windows-x64.exe","bin":"bin/kmpzip-windows-x64.exe","platform":"windows"}
]'
if [ "$PLATFORMS" = "all" ]; then
filtered="$all"
else
filtered=$(printf '%s' "$all" | python3 -c "import json,os,sys; \
data=json.load(sys.stdin); want=os.environ['PLATFORMS']; \
print(json.dumps([e for e in data if e['platform']==want]))")
fi
matrix=$(printf '%s' "$filtered" | python3 -c "import json,sys; print(json.dumps({'include': json.load(sys.stdin)}))")
echo "matrix=$matrix" >> "$GITHUB_OUTPUT"
echo "Selected matrix: $matrix"
bench:
needs: setup
strategy:
fail-fast: false
matrix: ${{ fromJSON(needs.setup.outputs.matrix) }}
runs-on: ${{ matrix.os }}
steps:
- uses: actions/checkout@v6
- uses: actions/setup-python@v5
with:
python-version: "3.12"
- name: Fetch kmpzip from ${{ needs.setup.outputs.source }}
shell: bash
env:
GH_TOKEN: ${{ secrets.GITHUB_TOKEN }}
SOURCE: ${{ needs.setup.outputs.source }}
RELEASE_TAG: ${{ needs.setup.outputs.release_tag }}
ARTIFACT: ${{ matrix.artifact }}
run: |
set -e
mkdir -p bin
if [ "$SOURCE" = "release" ]; then
tag="$RELEASE_TAG"
if [ "$tag" = "latest" ] || [ -z "$tag" ]; then
tag=$(gh release view --json tagName --jq .tagName)
fi
echo "Downloading $ARTIFACT from release $tag"
gh release download "$tag" --pattern "$ARTIFACT" --dir bin/
else
run_id=$(gh run list --workflow CI --branch main --status success \
--limit 1 --json databaseId --jq '.[0].databaseId')
if [ -z "$run_id" ]; then
echo "ERROR: no successful CI run found on main" >&2
exit 1
fi
echo "Downloading $ARTIFACT from CI run $run_id"
gh run download "$run_id" --name "$ARTIFACT" --dir bin/
# CI uploads the artifact as a directory named $ARTIFACT containing the binary;
# download-artifact v7 with archive=false flattens it. gh run download flattens too.
ls -la bin/
fi
chmod +x "${{ matrix.bin }}" 2>/dev/null || true
ls -la "${{ matrix.bin }}"
- name: Verify binary
shell: bash
run: |
"./${{ matrix.bin }}" help || true
- name: Install reference tools (macOS)
if: matrix.platform == 'macos'
shell: bash
run: brew install gzip # GNU gzip alongside the BSD /usr/bin/gzip
- name: Install reference tools (Windows)
if: matrix.platform == 'windows'
shell: bash
run: choco install zip unzip -y --no-progress
- name: Compose tool arguments
id: tools
shell: bash
run: |
set -e
# Note: --zip takes only the zip binary path; perf.py infers the
# sibling unzip. Joining zip_path:unzip_path on Windows breaks
# because Windows paths contain ':' (e.g. C:/...).
case "${{ matrix.platform }}" in
linux)
gzips=( "--gzip" "gnu=$(command -v gzip)" )
zips=( "--zip" "system=$(command -v zip)" )
;;
macos)
gzips=( "--gzip" "apple=/usr/bin/gzip" "--gzip" "gnu=/opt/homebrew/bin/gzip" )
zips=( "--zip" "system=/usr/bin/zip" )
;;
windows)
gzips=( "--gzip" "gnu=$(command -v gzip)" )
zip_path=$(command -v zip || true)
if [ -n "$zip_path" ]; then
zips=( "--zip" "system=$zip_path" )
else
# choco may have failed to install zip; perf.py will still
# bench kmpzip alone for the zip family.
echo "WARN: 'zip' not on PATH; skipping system zip tool"
zips=()
fi
;;
esac
# Encode as a single line; bash split happens in the bench step
printf 'args=%s\n' "${gzips[*]} ${zips[*]}" >> "$GITHUB_OUTPUT"
echo "Composed: ${gzips[*]} ${zips[*]}"
# Sanity-check binaries exist
for arg in "${gzips[@]}" "${zips[@]}"; do
case "$arg" in
--gzip|--zip) continue ;;
esac
p="${arg#*=}"
if ! command -v "$p" >/dev/null 2>&1 && [ ! -x "$p" ]; then
echo "WARN: tool not found or not executable: $p"
fi
done
- name: Run benchmark
shell: bash
env:
SIZES: ${{ needs.setup.outputs.sizes }}
TRIALS: ${{ needs.setup.outputs.trials }}
KMPZIP: ${{ matrix.bin }}
TOOL_ARGS: ${{ steps.tools.outputs.args }}
run: |
mkdir -p perf-results
# shellcheck disable=SC2086 # word-splitting on TOOL_ARGS is intentional
python perf/perf.py \
--kmpzip "$KMPZIP" \
$TOOL_ARGS \
--workdir perf-work \
--sizes "$SIZES" \
--trials "$TRIALS" \
--json-out "perf-results/perf-${{ matrix.platform }}.json" \
| tee "perf-results/perf-${{ matrix.platform }}.txt"
- name: Upload results
if: always()
uses: actions/upload-artifact@v7
with:
name: perf-${{ matrix.platform }}
path: perf-results/
if-no-files-found: error
summary:
needs: [setup, bench]
if: always()
runs-on: ubuntu-latest
steps:
- uses: actions/download-artifact@v6
with:
path: results/
pattern: perf-*
merge-multiple: false
- name: Render summary
shell: bash
env:
SOURCE: ${{ needs.setup.outputs.source }}
RELEASE_TAG: ${{ needs.setup.outputs.release_tag }}
SIZES: ${{ needs.setup.outputs.sizes }}
TRIALS: ${{ needs.setup.outputs.trials }}
PLATFORMS: ${{ needs.setup.outputs.platforms }}
run: |
{
echo "# kmpzip vs gzip — perf summary"
echo
if [ "$SOURCE" = "release" ]; then
echo "Source: \`release\` (tag: \`$RELEASE_TAG\`)"
else
echo "Source: \`$SOURCE\`"
fi
echo "Sizes: \`$SIZES\` · trials: $TRIALS · platforms: $PLATFORMS"
echo
for f in results/*/perf-*.txt; do
[ -f "$f" ] || continue
echo "## $(basename "$f" .txt)"
echo '```'
cat "$f"
echo '```'
echo
done
} >> "$GITHUB_STEP_SUMMARY"