Skip to content
30 changes: 29 additions & 1 deletion handwritten/storage/internal-tooling/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -40,4 +40,32 @@ For each invocation of the benchmark, write a new object of random size between
| ElapsedTimeUs | the elapsed time in microseconds the operation took |
| Status | completion state of the operation [OK, FAIL] |
| AppBufferSize | N/A |
| CpuTimeUs | N/A |
| CpuTimeUs | N/A |

---

## Comparative Latency & Memory Benchmarking (`benchmark.ts`)

This benchmark compares the current codebase build against a specified baseline NPM version of `@google-cloud/storage` (e.g. comparing Gaxios migration vs baseline `7.19.0`). It measures latency stats for upload, metadata lookup, and download scenarios, while tracking heap memory footprint changes.

### Run Example:

1. **Compile the codebase:**
```bash
cd handwritten/storage
npm run compile
```

2. **Execute the benchmark comparison:**
```bash
node build/esm/internal-tooling/benchmark.js --project <YOUR_PROJECT_ID> --bucket <YOUR_BUCKET_NAME> --iterations 100 --baseline 7.19.0
```

### CLI Parameters:

| Parameter | Description | Requirement | Default |
| --------- | ----------- | :---: | :---: |
| `--project` | Google Cloud Project ID | **Required** | - |
| `--bucket` | Cloud Storage Bucket Name to upload/download files | **Required** | - |
| `--iterations` | Number of iterations for each workload scenario | Optional | `100` |
| `--baseline` | Stable baseline NPM version of `@google-cloud/storage` to compare against | Optional | - |
178 changes: 178 additions & 0 deletions handwritten/storage/internal-tooling/benchmark.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,178 @@
/*!
* Copyright 2026 Google LLC. All Rights Reserved.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/

import {Storage, File} from '../src/index.js';
import {performance} from 'perf_hooks';
import * as path from 'path';
import * as fs from 'fs';
import {execSync} from 'child_process';
import * as os from 'os';
import yargs from 'yargs';

const FILE_SIZE_BYTES = 1024; // 1KB

interface Args {
project: string;
bucket: string;
iterations: number;
baseline?: string;
}

const argv = yargs(process.argv.slice(2))
.option('project', {type: 'string', demandOption: true, description: 'Google Cloud Project ID'})
.option('bucket', {type: 'string', demandOption: true, description: 'Cloud Storage Bucket Name'})
.option('iterations', {type: 'number', default: 100, description: 'Number of iterations for each test'})
.option('baseline', {type: 'string', description: 'Baseline version of @google-cloud/storage to compare against (e.g., 7.19.0)'})
.parseSync() as unknown as Args;

let tempDirToDelete: string | undefined;

async function loadBaseline(version: string) {
// 1. Strict SemVer regular expression to prevent command injection
const semverRegex = /^\d+\.\d+\.\d+(-[a-zA-Z0-9.]+)?$/;
if (!semverRegex.test(version)) {
throw new Error(`Invalid baseline version format: "${version}". Must be a valid semver string (e.g. 7.19.0).`);
}

const tempDir = fs.mkdtempSync(path.join(os.tmpdir(), 'storage-benchmark-'));
tempDirToDelete = tempDir; // Track for cleanup

console.log(`Installing baseline version ${version} in ${tempDir}...`);
fs.writeFileSync(path.join(tempDir, 'package.json'), JSON.stringify({name: 'bench-temp'}));
execSync(`npm install @google-cloud/storage@${version} --silent`, {cwd: tempDir});
const baselinePath = path.join(tempDir, 'node_modules', '@google-cloud/storage');

const pkgJson = JSON.parse(fs.readFileSync(path.join(baselinePath, 'package.json'), 'utf8'));
const main = pkgJson.main || './build/src/index.js';
const entry = path.join(baselinePath, main);

console.log(`Loading baseline from ${entry}`);
const pkg = await import(entry);
return pkg.Storage || pkg.default?.Storage || pkg.default;
}

async function runBenchmark(StorageClass: typeof Storage, name: string, bucketName: string) {
// 2. Pass custom project ID to the storage client
const storage = new StorageClass({ projectId: argv.project });
const bucket = storage.bucket(bucketName);
const content = Buffer.alloc(FILE_SIZE_BYTES, 'a');
const uploadedFiles: File[] = [];

console.log(`\n=== Running benchmark for ${name} ===`);
const logMemory = (prefix: string) => {
const mem = process.memoryUsage();
console.log(`${prefix} - Heap Used: ${(mem.heapUsed / 1024 / 1024).toFixed(2)} MB / Heap Total: ${(mem.heapTotal / 1024 / 1024).toFixed(2)} MB`);
};

try {
// Scenario 1: Upload Small File
console.log('Starting Scenario 1: Upload (1KB)...');
let uploadTimes: number[] = [];
for (let i = 0; i < argv.iterations; i++) {
if (i % 10 === 0) logMemory(` Upload iteration ${i}`);
const iterFilename = `bench-${name}-${Date.now()}-${i}.bin`;
const iterFile = bucket.file(iterFilename);
const start = performance.now();
await iterFile.save(content);
uploadTimes.push(performance.now() - start);
uploadedFiles.push(iterFile);
}
reportResults('Upload (1KB)', uploadTimes, true);
logMemory('After Upload');

const mainFile = uploadedFiles[0];

// Scenario 2: Get Metadata
console.log('Starting Scenario 2: Get Metadata...');
let metadataTimes: number[] = [];
for (let i = 0; i < argv.iterations; i++) {
if (i % 10 === 0) logMemory(` Metadata iteration ${i}`);
const start = performance.now();
await mainFile.getMetadata();
metadataTimes.push(performance.now() - start);
}
reportResults('Get Metadata', metadataTimes);
logMemory('After Metadata');

// Scenario 3: Download Small File
console.log('Starting Scenario 3: Download (1KB)...');
let downloadTimes: number[] = [];
for (let i = 0; i < argv.iterations; i++) {
if (i % 10 === 0) logMemory(` Download iteration ${i}`);
const start = performance.now();
await mainFile.download();
downloadTimes.push(performance.now() - start);
}
reportResults('Download (1KB)', downloadTimes, true);
logMemory('After Download');

} finally {
// 3. Guaranteed cloud files deletion
console.log('Cleaning up cloud files...');
await Promise.all(uploadedFiles.map(f => f.delete().catch(() => {})));
logMemory('After Cleanup');
}
}

function reportResults(operation: string, times: number[], includeThroughput = false) {
const min = Math.min(...times);
const max = Math.max(...times);
const avg = times.reduce((a, b) => a + b, 0) / times.length;

console.log(`\n${operation}:`);
console.log(` Iterations: ${times.length}`);
console.log(` Average Latency: ${avg.toFixed(2)} ms`);
console.log(` Min Latency: ${min.toFixed(2)} ms`);
console.log(` Max Latency: ${max.toFixed(2)} ms`);
if (includeThroughput) {
const throughput = 1000 / avg; // KB/s (assuming 1KB payload)
console.log(` Approx. Throughput: ${throughput.toFixed(2)} KB/s`);
}
}

async function main() {
try {
// 4. Validate iterations parameter to handle edge cases
if (argv.iterations < 1) {
throw new Error('Iterations parameter must be greater than or equal to 1');
}

// Run for local version
await runBenchmark(Storage, 'Current (Gaxios)', argv.bucket);

// Run for baseline if specified
if (argv.baseline) {
const BaselineStorage = await loadBaseline(argv.baseline);
await runBenchmark(BaselineStorage, `Baseline (${argv.baseline})`, argv.bucket);
}
} catch (error) {
console.error('Error running benchmark:', error);
// 6. Exit with non-zero code on failures for CI integration
process.exitCode = 1;
} finally {
// 3. Guaranteed local directory cleanup
if (tempDirToDelete) {
console.log(`Cleaning up local temporary directory: ${tempDirToDelete}`);
try {
fs.rmSync(tempDirToDelete, { recursive: true, force: true });
} catch (cleanupErr) {
console.error('Failed to clean up local temporary directory:', cleanupErr);
}
}
}
}

main();
Loading