From eff19984cf97dc253d1cb749b6428d39e5576061 Mon Sep 17 00:00:00 2001 From: Chris Huber Date: Mon, 27 Apr 2026 17:16:35 -0400 Subject: [PATCH] feat(github): expand PR review context --- inc/Abilities/GitHubAbilities.php | 217 ++++++++++++++++++++++- inc/Handlers/GitHub/GitHub.php | 14 +- inc/Handlers/GitHub/GitHubSettings.php | 41 +++++ tests/smoke-github-pr-review-context.php | 97 ++++++++++ 4 files changed, 363 insertions(+), 6 deletions(-) diff --git a/inc/Abilities/GitHubAbilities.php b/inc/Abilities/GitHubAbilities.php index c0ef887..791c7ce 100644 --- a/inc/Abilities/GitHubAbilities.php +++ b/inc/Abilities/GitHubAbilities.php @@ -726,8 +726,9 @@ public static function getPullReviewContext( array $input ): array|\WP_Error { $pull['pull'], $files, array( - 'head_sha' => sanitize_text_field( $input['head_sha'] ?? '' ), - 'max_patch_chars' => (int) ( $input['max_patch_chars'] ?? 200000 ), + 'head_sha' => sanitize_text_field( $input['head_sha'] ?? '' ), + 'max_patch_chars' => (int) ( $input['max_patch_chars'] ?? 200000 ), + 'expanded_context' => self::buildPullReviewExpandedContext( $repo, $pull['pull'], $files, $input ), ) ); @@ -741,6 +742,214 @@ public static function getPullReviewContext( array $input ): array|\WP_Error { ); } + /** + * Build optional full-file context for PR review packets. + * + * @param string $repo Repository in owner/repo format. + * @param array $pull Normalized pull request payload. + * @param array $files Normalized changed file payloads. + * @param array $options Expansion options. + * @param callable|null $fetcher Optional test seam: fn(string $path, string $ref, string $side): array|WP_Error. + * @return array|null Expanded context object, or null when no expansion was requested. + */ + public static function buildPullReviewExpandedContext( string $repo, array $pull, array $files, array $options = array(), ?callable $fetcher = null ): ?array { + $include_file_contents = ! empty( $options['include_file_contents'] ); + $include_base_contents = ! empty( $options['include_base_contents'] ); + $context_paths = self::normalizeContextPaths( $options['context_paths'] ?? array() ); + + if ( ! $include_file_contents && empty( $context_paths ) ) { + return null; + } + + $limits = array( + 'max_file_content_chars' => max( 1, (int) ( $options['max_file_content_chars'] ?? 20000 ) ), + 'max_context_files' => max( 1, (int) ( $options['max_context_files'] ?? 10 ) ), + 'max_total_context_chars' => max( 1, (int) ( $options['max_total_context_chars'] ?? 100000 ) ), + ); + + $head_ref = (string) ( $pull['head_sha'] ?? $pull['head_ref'] ?? $pull['head'] ?? '' ); + $base_ref = (string) ( $pull['base_sha'] ?? $pull['base_ref'] ?? $pull['base'] ?? '' ); + $fetcher = $fetcher ?? static function ( string $path, string $ref, string $_side ) use ( $repo ): array|\WP_Error { + return self::getFileContents( + array( + 'repo' => $repo, + 'path' => $path, + 'branch' => $ref, + ) + ); + }; + + $expanded = array( + 'changed_files' => array(), + 'extra_files' => array(), + 'skipped' => array(), + 'limits' => $limits, + 'summary' => array( + 'included_files' => 0, + 'included_chars' => 0, + 'skipped_files' => 0, + 'truncated' => false, + ), + ); + + $remaining_files = $limits['max_context_files']; + $remaining_chars = $limits['max_total_context_chars']; + + if ( $include_file_contents ) { + foreach ( $files as $file ) { + $path = (string) ( $file['filename'] ?? '' ); + if ( '' === $path ) { + continue; + } + + if ( $remaining_files <= 0 || $remaining_chars <= 0 ) { + self::recordExpandedContextSkip( $expanded, $path, 'changed_file', 'limit_exceeded' ); + continue; + } + + $entry = array( + 'path' => $path, + ); + + if ( '' !== $head_ref ) { + $entry['head'] = self::fetchBoundedContextFile( $fetcher, $path, $head_ref, 'head', $limits['max_file_content_chars'], $remaining_chars ); + self::applyContextFileAccounting( $expanded, $entry['head'], $remaining_chars ); + } + + if ( $include_base_contents && '' !== $base_ref && $remaining_chars > 0 ) { + $entry['base'] = self::fetchBoundedContextFile( $fetcher, $path, $base_ref, 'base', $limits['max_file_content_chars'], $remaining_chars ); + self::applyContextFileAccounting( $expanded, $entry['base'], $remaining_chars ); + } + + $expanded['changed_files'][] = $entry; + $remaining_files--; + } + } + + foreach ( $context_paths as $path ) { + if ( $remaining_files <= 0 || $remaining_chars <= 0 ) { + self::recordExpandedContextSkip( $expanded, $path, 'context_path', 'limit_exceeded' ); + continue; + } + + $file_context = self::fetchBoundedContextFile( $fetcher, $path, $head_ref, 'head', $limits['max_file_content_chars'], $remaining_chars ); + self::applyContextFileAccounting( $expanded, $file_context, $remaining_chars ); + $expanded['extra_files'][] = array( + 'path' => $path, + 'head' => $file_context, + ); + $remaining_files--; + } + + $expanded['summary']['included_files'] = count( $expanded['changed_files'] ) + count( $expanded['extra_files'] ); + return $expanded; + } + + /** + * Normalize context paths from array or comma/newline separated string input. + */ + private static function normalizeContextPaths( mixed $paths ): array { + if ( is_string( $paths ) ) { + $paths = preg_split( '/[\r\n,]+/', $paths ) ?: array(); + } + + if ( ! is_array( $paths ) ) { + return array(); + } + + $normalized = array(); + foreach ( $paths as $path ) { + $path = trim( (string) $path ); + $path = ltrim( $path, '/' ); + if ( '' === $path || str_contains( $path, '..' ) ) { + continue; + } + $normalized[ $path ] = true; + } + + return array_keys( $normalized ); + } + + /** + * Fetch and bound one expanded-context file response. + */ + private static function fetchBoundedContextFile( callable $fetcher, string $path, string $ref, string $side, int $max_file_chars, int $remaining_chars ): array { + if ( '' === $ref ) { + return array( + 'ref' => $ref, + 'included' => false, + 'reason' => 'missing_ref', + ); + } + + if ( $remaining_chars <= 0 ) { + return array( + 'ref' => $ref, + 'included' => false, + 'reason' => 'total_limit_exceeded', + ); + } + + $result = $fetcher( $path, $ref, $side ); + if ( is_wp_error( $result ) ) { + return array( + 'ref' => $ref, + 'included' => false, + 'reason' => $result->get_error_code(), + 'error' => $result->get_error_message(), + ); + } + + $file = $result['file'] ?? $result; + $content = (string) ( $file['content'] ?? '' ); + $original = strlen( $content ); + $limit = min( $max_file_chars, $remaining_chars ); + $included = substr( $content, 0, $limit ); + $chars = strlen( $included ); + + return array( + 'ref' => $ref, + 'sha' => $file['sha'] ?? '', + 'size' => (int) ( $file['size'] ?? $original ), + 'html_url' => $file['html_url'] ?? '', + 'content' => $included, + 'included' => true, + 'included_chars' => $chars, + 'original_chars' => $original, + 'truncated' => $chars < $original, + ); + } + + /** + * Apply char accounting for one expanded-context file side. + */ + private static function applyContextFileAccounting( array &$expanded, array $file_context, int &$remaining_chars ): void { + if ( empty( $file_context['included'] ) ) { + $expanded['summary']['skipped_files']++; + return; + } + + $included_chars = (int) ( $file_context['included_chars'] ?? 0 ); + $expanded['summary']['included_chars'] += $included_chars; + $remaining_chars = max( 0, $remaining_chars - $included_chars ); + + if ( ! empty( $file_context['truncated'] ) ) { + $expanded['summary']['truncated'] = true; + } + } + + /** + * Record a file skipped before any API fetch was attempted. + */ + private static function recordExpandedContextSkip( array &$expanded, string $path, string $source, string $reason ): void { + $expanded['skipped'][] = array( + 'path' => $path, + 'source' => $source, + 'reason' => $reason, + ); + $expanded['summary']['skipped_files']++; + } + /** * Get a single pull request. * @@ -1246,6 +1455,10 @@ public static function normalizePullReviewContext( string $repo, array $pull, ar ), ); + if ( isset( $options['expanded_context'] ) && is_array( $options['expanded_context'] ) ) { + $context['expanded_context'] = $options['expanded_context']; + } + return array( 'title' => $title, 'content' => wp_json_encode( $context, JSON_PRETTY_PRINT | JSON_UNESCAPED_SLASHES ), diff --git a/inc/Handlers/GitHub/GitHub.php b/inc/Handlers/GitHub/GitHub.php index 3f6edf0..fca1b50 100644 --- a/inc/Handlers/GitHub/GitHub.php +++ b/inc/Handlers/GitHub/GitHub.php @@ -100,10 +100,16 @@ private function fetchPullReviewContext( array $config, ExecutionContext $contex } $result = GitHubAbilities::getPullReviewContext( array( - 'repo' => $repo, - 'pull_number' => $pull_number, - 'head_sha' => $config['head_sha'] ?? '', - 'max_patch_chars' => $config['max_patch_chars'] ?? 200000, + 'repo' => $repo, + 'pull_number' => $pull_number, + 'head_sha' => $config['head_sha'] ?? '', + 'max_patch_chars' => $config['max_patch_chars'] ?? 200000, + 'include_file_contents' => ! empty( $config['include_file_contents'] ), + 'include_base_contents' => ! empty( $config['include_base_contents'] ), + 'context_paths' => $config['context_paths'] ?? array(), + 'max_file_content_chars' => $config['max_file_content_chars'] ?? 20000, + 'max_context_files' => $config['max_context_files'] ?? 10, + 'max_total_context_chars' => $config['max_total_context_chars'] ?? 100000, ) ); if ( is_wp_error( $result ) ) { diff --git a/inc/Handlers/GitHub/GitHubSettings.php b/inc/Handlers/GitHub/GitHubSettings.php index db9e7f9..dc73772 100644 --- a/inc/Handlers/GitHub/GitHubSettings.php +++ b/inc/Handlers/GitHub/GitHubSettings.php @@ -63,6 +63,47 @@ public static function get_fields(): array { 'required' => false, 'default' => 200000, ), + 'include_file_contents' => array( + 'type' => 'checkbox', + 'label' => __( 'Include Changed File Contents', 'data-machine-code' ), + 'description' => __( 'Opt in to bounded full-file contents for changed files in PR review context.', 'data-machine-code' ), + 'required' => false, + 'default' => false, + ), + 'include_base_contents' => array( + 'type' => 'checkbox', + 'label' => __( 'Include Base File Contents', 'data-machine-code' ), + 'description' => __( 'When changed file contents are enabled, also include bounded base-branch contents for comparison.', 'data-machine-code' ), + 'required' => false, + 'default' => false, + ), + 'context_paths' => array( + 'type' => 'textarea', + 'label' => __( 'Additional Context Paths', 'data-machine-code' ), + 'description' => __( 'Optional comma- or newline-separated repository paths to include from the PR head ref.', 'data-machine-code' ), + 'required' => false, + ), + 'max_file_content_chars' => array( + 'type' => 'number', + 'label' => __( 'Max File Content Characters', 'data-machine-code' ), + 'description' => __( 'Maximum characters included per expanded file content block.', 'data-machine-code' ), + 'required' => false, + 'default' => 20000, + ), + 'max_context_files' => array( + 'type' => 'number', + 'label' => __( 'Max Context Files', 'data-machine-code' ), + 'description' => __( 'Maximum number of files included in expanded PR review context.', 'data-machine-code' ), + 'required' => false, + 'default' => 10, + ), + 'max_total_context_chars' => array( + 'type' => 'number', + 'label' => __( 'Max Total Context Characters', 'data-machine-code' ), + 'description' => __( 'Maximum cumulative characters included across all expanded PR review context files.', 'data-machine-code' ), + 'required' => false, + 'default' => 100000, + ), 'state' => array( 'type' => 'select', 'label' => __( 'State Filter', 'data-machine-code' ), diff --git a/tests/smoke-github-pr-review-context.php b/tests/smoke-github-pr-review-context.php index aab767a..8476449 100644 --- a/tests/smoke-github-pr-review-context.php +++ b/tests/smoke-github-pr-review-context.php @@ -149,8 +149,105 @@ function wp_json_encode( $data, int $options = 0 ) { return json_encode( $data, $assert( 40 === $content['truncation']['max_patch_chars'], 'truncation records configured limit' ); $assert( 1 === $content['truncation']['truncated_files'], 'truncation counts omitted patches' ); $assert( true === $content['truncation']['truncated'], 'truncation boolean is true when a patch is omitted' ); + $assert( ! isset( $content['expanded_context'] ), 'default output omits expanded_context' ); $assert( $content === $packet['metadata']['review_context'], 'metadata carries same review context for downstream consumers' ); + $fetcher = function ( string $path, string $ref ): array|\WP_Error { + $fixtures = array( + 'abc123head:inc/Handlers/GitHub/GitHub.php' => 'head changed file content', + 'def456base:inc/Handlers/GitHub/GitHub.php' => 'base changed file content', + 'abc123head:docs/context.md' => 'extra context file content', + 'abc123head:docs/one.md' => 'one', + 'abc123head:docs/two.md' => 'two', + ); + + $key = $ref . ':' . $path; + if ( ! isset( $fixtures[ $key ] ) ) { + return new \WP_Error( 'github_not_found', 'Fixture not found.' ); + } + + return array( + 'file' => array( + 'path' => $path, + 'sha' => substr( sha1( $key ), 0, 12 ), + 'size' => strlen( $fixtures[ $key ] ), + 'content' => $fixtures[ $key ], + 'html_url' => 'https://github.test/' . $path, + ), + ); + }; + + $expanded = \DataMachineCode\Abilities\GitHubAbilities::buildPullReviewExpandedContext( + 'Extra-Chill/data-machine-code', + $pull, + array_slice( $files, 0, 1 ), + array( + 'include_file_contents' => true, + 'include_base_contents' => true, + 'context_paths' => array( 'docs/context.md' ), + 'max_file_content_chars' => 10, + 'max_context_files' => 3, + 'max_total_context_chars' => 50, + ), + $fetcher + ); + + $assert( is_array( $expanded ), 'expanded context is built when expansion is requested' ); + $assert( 1 === count( $expanded['changed_files'] ), 'expanded context separates changed-file contents' ); + $assert( 1 === count( $expanded['extra_files'] ), 'expanded context separates explicit context paths' ); + $assert( 'head chang' === $expanded['changed_files'][0]['head']['content'], 'changed-file head content is included and per-file bounded' ); + $assert( 'base chang' === $expanded['changed_files'][0]['base']['content'], 'base content is included only when requested' ); + $assert( true === $expanded['changed_files'][0]['head']['truncated'], 'per-file truncation is explicit' ); + $assert( 'docs/context.md' === $expanded['extra_files'][0]['path'], 'explicit context path is included' ); + $assert( 30 === $expanded['summary']['included_chars'], 'expanded context tracks included character total' ); + + $total_limited = \DataMachineCode\Abilities\GitHubAbilities::buildPullReviewExpandedContext( + 'Extra-Chill/data-machine-code', + $pull, + array_slice( $files, 0, 1 ), + array( + 'include_file_contents' => true, + 'context_paths' => array( 'docs/context.md' ), + 'max_file_content_chars' => 20, + 'max_context_files' => 2, + 'max_total_context_chars' => 25, + ), + $fetcher + ); + + $assert( 25 === $total_limited['summary']['included_chars'], 'total context limit is enforced' ); + $assert( 5 === $total_limited['extra_files'][0]['head']['included_chars'], 'remaining total budget bounds later files' ); + $assert( true === $total_limited['extra_files'][0]['head']['truncated'], 'total-limit truncation is explicit' ); + + $file_limited = \DataMachineCode\Abilities\GitHubAbilities::buildPullReviewExpandedContext( + 'Extra-Chill/data-machine-code', + $pull, + array(), + array( + 'context_paths' => array( 'docs/one.md', 'docs/two.md' ), + 'max_context_files' => 1, + 'max_total_context_chars' => 100, + ), + $fetcher + ); + + $assert( 1 === count( $file_limited['extra_files'] ), 'context path inclusion respects file-count limit' ); + $assert( 1 === count( $file_limited['skipped'] ), 'file-count limit records skipped files' ); + $assert( 'limit_exceeded' === $file_limited['skipped'][0]['reason'], 'skipped file includes machine-readable reason' ); + + $packet_with_expansion = \DataMachineCode\Abilities\GitHubAbilities::normalizePullReviewContext( + 'Extra-Chill/data-machine-code', + $pull, + $files, + array( + 'expanded_context' => $expanded, + ) + ); + $expanded_content = json_decode( $packet_with_expansion['content'], true ); + $assert( isset( $expanded_content['expanded_context'] ), 'review packet carries expanded_context when provided' ); + $assert( isset( $expanded_content['changed_files'] ), 'review packet keeps patch changed_files at the top level' ); + $assert( isset( $expanded_content['expanded_context']['changed_files'][0]['head']['content'] ), 'expanded changed-file content stays under expanded_context' ); + $mismatch = \DataMachineCode\Abilities\GitHubAbilities::normalizePullReviewContext( 'Extra-Chill/data-machine-code', $pull,