From f555cf3bcad4f4700fca7e350da37b676971024e Mon Sep 17 00:00:00 2001 From: Meni Yakove Date: Tue, 16 Jun 2026 11:58:05 +0300 Subject: [PATCH 1/4] fix: whitelist GitHub web-flow in committer identity check When a PR is rebased via GitHub web UI, the commit committer is set to web-flow (GitHub's internal bot account). The security committer identity check was incorrectly flagging this as a mismatch. - Add GITHUB_WEB_FLOW_LOGIN constant for web-flow account - Pass committer identity check when last committer is web-flow - Add test for web-flow committer scenario Closes #1119 --- .../libs/handlers/runner_handler.py | 19 +++++++++++++++++++ webhook_server/tests/test_security_checks.py | 18 ++++++++++++++++++ webhook_server/utils/constants.py | 1 + 3 files changed, 38 insertions(+) diff --git a/webhook_server/libs/handlers/runner_handler.py b/webhook_server/libs/handlers/runner_handler.py index 1a03435c4..c81dc515c 100644 --- a/webhook_server/libs/handlers/runner_handler.py +++ b/webhook_server/libs/handlers/runner_handler.py @@ -25,6 +25,7 @@ BUILD_CONTAINER_STR, CHERRY_PICKED_LABEL, CONVENTIONAL_TITLE_STR, + GITHUB_WEB_FLOW_LOGIN, PRE_COMMIT_STR, PREK_STR, PYTHON_MODULE_INSTALL_STR, @@ -404,6 +405,24 @@ async def run_security_committer_identity(self) -> None: f"PR author={parent_committer}, last committer has no GitHub user" ) await self.check_run_handler.set_check_failure(name=SECURITY_COMMITTER_IDENTITY_STR, output=output) + elif last_committer == GITHUB_WEB_FLOW_LOGIN: + self.logger.info( + f"{self.log_prefix} Last committer is '{GITHUB_WEB_FLOW_LOGIN}' " + f"(GitHub web UI operation) — passing committer identity check" + ) + output = { + "title": "Security: Committer Identity", + "summary": "Committer identity verified (GitHub web-flow)", + "text": ( + f"## Committer Identity Check\n\n" + f"**PR author:** `{parent_committer}`\n" + f"**Last commit committer:** `{last_committer}`\n\n" + f"The last commit was made via the GitHub web UI (rebase, merge, or edit). " + f"The `web-flow` committer is GitHub's internal account for web-based operations " + f"and is trusted." + ), + } + await self.check_run_handler.set_check_success(name=SECURITY_COMMITTER_IDENTITY_STR, output=output) elif last_committer != parent_committer: output = { "title": "\u274c Security: Committer Identity Mismatch", diff --git a/webhook_server/tests/test_security_checks.py b/webhook_server/tests/test_security_checks.py index 5099e77e8..0a780d7ef 100644 --- a/webhook_server/tests/test_security_checks.py +++ b/webhook_server/tests/test_security_checks.py @@ -21,6 +21,7 @@ BUILTIN_CHECK_NAMES, COMMAND_SECURITY_OVERRIDE_STR, DEFAULT_SUSPICIOUS_PATHS, + GITHUB_WEB_FLOW_LOGIN, SECURITY_COMMITTER_IDENTITY_STR, SECURITY_SUSPICIOUS_PATHS_STR, ) @@ -287,6 +288,23 @@ async def test_committer_identity_unknown(self, runner_handler: RunnerHandler) - assert "could not be verified" in output["summary"] assert "no associated GitHub user" in output["text"] + @pytest.mark.asyncio + async def test_committer_identity_web_flow(self, runner_handler: RunnerHandler) -> None: + """Check passes when last committer is GitHub's web-flow bot.""" + runner_handler.github_webhook.parent_committer = "legit-user" + runner_handler.github_webhook.last_committer = GITHUB_WEB_FLOW_LOGIN + + with patch.object(runner_handler.check_run_handler, "set_check_in_progress", new=AsyncMock()) as mock_progress: + with patch.object(runner_handler.check_run_handler, "set_check_success", new=AsyncMock()) as mock_success: + await runner_handler.run_security_committer_identity() + + mock_progress.assert_called_once_with(name=SECURITY_COMMITTER_IDENTITY_STR) + mock_success.assert_called_once() + call_args = mock_success.call_args + assert call_args.kwargs["name"] == SECURITY_COMMITTER_IDENTITY_STR + assert "web-flow" in call_args.kwargs["output"]["summary"] + assert GITHUB_WEB_FLOW_LOGIN in call_args.kwargs["output"]["text"] + @pytest.mark.asyncio async def test_committer_identity_check_disabled(self, runner_handler: RunnerHandler) -> None: """Check is skipped when committer-identity-check is disabled.""" diff --git a/webhook_server/utils/constants.py b/webhook_server/utils/constants.py index e1bc4aa4c..850ef37b7 100644 --- a/webhook_server/utils/constants.py +++ b/webhook_server/utils/constants.py @@ -18,6 +18,7 @@ CONVENTIONAL_TITLE_STR: str = "conventional-title" SECURITY_SUSPICIOUS_PATHS_STR: str = "security-suspicious-paths" SECURITY_COMMITTER_IDENTITY_STR: str = "security-committer-identity" +GITHUB_WEB_FLOW_LOGIN: str = "web-flow" COMMAND_SECURITY_OVERRIDE_STR: str = "security-override" WIP_STR: str = "wip" LGTM_STR: str = "lgtm" From 4b5258db60d1b78eb0862edd41e451dfc5313d69 Mon Sep 17 00:00:00 2001 From: Meni Yakove Date: Tue, 16 Jun 2026 12:14:13 +0300 Subject: [PATCH 2/4] fix: verify commit author when web-flow is committer Address Qodo review findings: - Change logger.info to logger.debug for web-flow detection path - When committer is web-flow, verify commit author matches PR author - Author match: pass, author unknown: fail, author mismatch: fail - Add last_author attribute from commit.author.login - Add tests for web-flow author mismatch and unknown author scenarios --- webhook_server/libs/github_api.py | 1 + .../libs/handlers/runner_handler.py | 77 +++++++++++++++---- webhook_server/tests/test_security_checks.py | 35 +++++++++ 3 files changed, 96 insertions(+), 17 deletions(-) diff --git a/webhook_server/libs/github_api.py b/webhook_server/libs/github_api.py index 5db4d3a9a..8ec916d80 100644 --- a/webhook_server/libs/github_api.py +++ b/webhook_server/libs/github_api.py @@ -664,6 +664,7 @@ async def process(self) -> Any: self.last_commit = await self._get_last_commit(pull_request=pull_request) self.parent_committer = pull_request.user.login self.last_committer = getattr(self.last_commit.committer, "login", "unknown") + self.last_author = getattr(self.last_commit.author, "login", "unknown") # Store PR SHAs: prefer webhook payload (avoids race condition with live API) # For pull_request events, base.sha and head.sha are guaranteed by GitHub webhook spec. diff --git a/webhook_server/libs/handlers/runner_handler.py b/webhook_server/libs/handlers/runner_handler.py index c81dc515c..421f4bcbd 100644 --- a/webhook_server/libs/handlers/runner_handler.py +++ b/webhook_server/libs/handlers/runner_handler.py @@ -406,23 +406,66 @@ async def run_security_committer_identity(self) -> None: ) await self.check_run_handler.set_check_failure(name=SECURITY_COMMITTER_IDENTITY_STR, output=output) elif last_committer == GITHUB_WEB_FLOW_LOGIN: - self.logger.info( - f"{self.log_prefix} Last committer is '{GITHUB_WEB_FLOW_LOGIN}' " - f"(GitHub web UI operation) — passing committer identity check" - ) - output = { - "title": "Security: Committer Identity", - "summary": "Committer identity verified (GitHub web-flow)", - "text": ( - f"## Committer Identity Check\n\n" - f"**PR author:** `{parent_committer}`\n" - f"**Last commit committer:** `{last_committer}`\n\n" - f"The last commit was made via the GitHub web UI (rebase, merge, or edit). " - f"The `web-flow` committer is GitHub's internal account for web-based operations " - f"and is trusted." - ), - } - await self.check_run_handler.set_check_success(name=SECURITY_COMMITTER_IDENTITY_STR, output=output) + last_author = self.github_webhook.last_author + if last_author == parent_committer: + self.logger.debug( + f"{self.log_prefix} Last committer is '{GITHUB_WEB_FLOW_LOGIN}' " + f"(GitHub web UI operation), author '{last_author}' matches PR author " + f"— passing committer identity check" + ) + output = { + "title": "Security: Committer Identity", + "summary": "Committer identity verified (GitHub web-flow)", + "text": ( + f"## Committer Identity Check\n\n" + f"**PR author:** `{parent_committer}`\n" + f"**Last commit committer:** `{last_committer}`\n" + f"**Last commit author:** `{last_author}`\n\n" + f"The last commit was made via the GitHub web UI (rebase, merge, or edit). " + f"The `web-flow` committer is GitHub's internal account for web-based operations. " + f"The commit author matches the PR author." + ), + } + await self.check_run_handler.set_check_success(name=SECURITY_COMMITTER_IDENTITY_STR, output=output) + elif last_author == "unknown": + self.logger.warning( + f"{self.log_prefix} Web-flow commit with unknown author: " + f"PR author={parent_committer}, committer={last_committer}, author=unknown" + ) + output = { + "title": "\u274c Security: Committer Identity Unknown (web-flow)", + "summary": "Web-flow commit author could not be verified", + "text": ( + f"## Committer Identity Check\n\n" + f"**PR author:** `{parent_committer}`\n" + f"**Last commit committer:** `{last_committer}`\n" + f"**Last commit author:** unknown\n\n" + f"The last commit was made via the GitHub web UI, but the commit author " + f"could not be determined. Please verify the commit authorship before merging." + ), + } + await self.check_run_handler.set_check_failure(name=SECURITY_COMMITTER_IDENTITY_STR, output=output) + else: + self.logger.warning( + f"{self.log_prefix} Web-flow commit author mismatch: " + f"PR author={parent_committer}, committer={last_committer}, author={last_author}" + ) + output = { + "title": "\u274c Security: Committer Identity Mismatch (web-flow)", + "summary": ( + f"Web-flow commit author '{last_author}' differs from PR author '{parent_committer}'" + ), + "text": ( + f"## Committer Identity Check\n\n" + f"**PR author:** `{parent_committer}`\n" + f"**Last commit committer:** `{last_committer}`\n" + f"**Last commit author:** `{last_author}`\n\n" + f"The last commit was made via the GitHub web UI by a different user than the PR author. " + f"This may indicate an unexpected web-based change to the PR branch.\n\n" + f"Please verify this is expected before merging." + ), + } + await self.check_run_handler.set_check_failure(name=SECURITY_COMMITTER_IDENTITY_STR, output=output) elif last_committer != parent_committer: output = { "title": "\u274c Security: Committer Identity Mismatch", diff --git a/webhook_server/tests/test_security_checks.py b/webhook_server/tests/test_security_checks.py index 0a780d7ef..2023fbfde 100644 --- a/webhook_server/tests/test_security_checks.py +++ b/webhook_server/tests/test_security_checks.py @@ -293,6 +293,7 @@ async def test_committer_identity_web_flow(self, runner_handler: RunnerHandler) """Check passes when last committer is GitHub's web-flow bot.""" runner_handler.github_webhook.parent_committer = "legit-user" runner_handler.github_webhook.last_committer = GITHUB_WEB_FLOW_LOGIN + runner_handler.github_webhook.last_author = "legit-user" with patch.object(runner_handler.check_run_handler, "set_check_in_progress", new=AsyncMock()) as mock_progress: with patch.object(runner_handler.check_run_handler, "set_check_success", new=AsyncMock()) as mock_success: @@ -305,6 +306,40 @@ async def test_committer_identity_web_flow(self, runner_handler: RunnerHandler) assert "web-flow" in call_args.kwargs["output"]["summary"] assert GITHUB_WEB_FLOW_LOGIN in call_args.kwargs["output"]["text"] + @pytest.mark.asyncio + async def test_committer_identity_web_flow_author_mismatch(self, runner_handler: RunnerHandler) -> None: + """Check fails when web-flow commit has different author than PR author.""" + runner_handler.github_webhook.parent_committer = "legit-user" + runner_handler.github_webhook.last_committer = GITHUB_WEB_FLOW_LOGIN + runner_handler.github_webhook.last_author = "other-user" + + with patch.object(runner_handler.check_run_handler, "set_check_in_progress", new=AsyncMock()) as mock_progress: + with patch.object(runner_handler.check_run_handler, "set_check_failure", new=AsyncMock()) as mock_failure: + await runner_handler.run_security_committer_identity() + + mock_progress.assert_called_once_with(name=SECURITY_COMMITTER_IDENTITY_STR) + mock_failure.assert_called_once() + call_args = mock_failure.call_args + assert call_args.kwargs["name"] == SECURITY_COMMITTER_IDENTITY_STR + assert "other-user" in call_args.kwargs["output"]["summary"] + + @pytest.mark.asyncio + async def test_committer_identity_web_flow_unknown_author(self, runner_handler: RunnerHandler) -> None: + """Check fails when web-flow commit has unknown author.""" + runner_handler.github_webhook.parent_committer = "legit-user" + runner_handler.github_webhook.last_committer = GITHUB_WEB_FLOW_LOGIN + runner_handler.github_webhook.last_author = "unknown" + + with patch.object(runner_handler.check_run_handler, "set_check_in_progress", new=AsyncMock()) as mock_progress: + with patch.object(runner_handler.check_run_handler, "set_check_failure", new=AsyncMock()) as mock_failure: + await runner_handler.run_security_committer_identity() + + mock_progress.assert_called_once_with(name=SECURITY_COMMITTER_IDENTITY_STR) + mock_failure.assert_called_once() + call_args = mock_failure.call_args + assert call_args.kwargs["name"] == SECURITY_COMMITTER_IDENTITY_STR + assert "could not be verified" in call_args.kwargs["output"]["summary"] + @pytest.mark.asyncio async def test_committer_identity_check_disabled(self, runner_handler: RunnerHandler) -> None: """Check is skipped when committer-identity-check is disabled.""" From 4e150901fbe5f3cf77c5ebf99c7779cfc37e4e77 Mon Sep 17 00:00:00 2001 From: Meni Yakove Date: Tue, 16 Jun 2026 12:23:19 +0300 Subject: [PATCH 3/4] fix: wrap last_committer/last_author with github_api_call and init in __init__ Address Qodo review findings: - Wrap last_committer and last_author PyGithub property access with github_api_call() for retry/backoff and non-blocking IO - Initialize last_committer and last_author in __init__ to prevent AttributeError if accessed before process() runs --- webhook_server/libs/github_api.py | 14 ++++++++++++-- 1 file changed, 12 insertions(+), 2 deletions(-) diff --git a/webhook_server/libs/github_api.py b/webhook_server/libs/github_api.py index 8ec916d80..a89e662bf 100644 --- a/webhook_server/libs/github_api.py +++ b/webhook_server/libs/github_api.py @@ -118,6 +118,8 @@ def __init__(self, hook_data: dict[Any, Any], headers: Headers, logger: logging. self.repository_full_name: str = hook_data["repository"]["full_name"] self._bg_tasks: set[Task[Any]] = set() self.parent_committer: str = "" + self.last_committer: str = "" + self.last_author: str = "" self.pr_base_sha: str = "" self.pr_head_sha: str = "" self.x_github_delivery: str = headers.get("X-GitHub-Delivery", "") @@ -663,8 +665,16 @@ async def process(self) -> Any: self.last_commit = await self._get_last_commit(pull_request=pull_request) self.parent_committer = pull_request.user.login - self.last_committer = getattr(self.last_commit.committer, "login", "unknown") - self.last_author = getattr(self.last_commit.author, "login", "unknown") + self.last_committer = await github_api_call( + lambda: getattr(self.last_commit.committer, "login", "unknown"), + logger=self.logger, + log_prefix=self.log_prefix, + ) + self.last_author = await github_api_call( + lambda: getattr(self.last_commit.author, "login", "unknown"), + logger=self.logger, + log_prefix=self.log_prefix, + ) # Store PR SHAs: prefer webhook payload (avoids race condition with live API) # For pull_request events, base.sha and head.sha are guaranteed by GitHub webhook spec. From 8f2e5da5dc7b386353aa512388099f5cff7ed352 Mon Sep 17 00:00:00 2001 From: Meni Yakove Date: Tue, 16 Jun 2026 12:32:17 +0300 Subject: [PATCH 4/4] fix: use "unknown" sentinel for last_committer/last_author defaults Change __init__ defaults from empty string to "unknown" to match the sentinel value used in the committer identity security check. Prevents false "verified" results if handler runs before process() populates the fields. --- webhook_server/libs/github_api.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/webhook_server/libs/github_api.py b/webhook_server/libs/github_api.py index a89e662bf..e95367566 100644 --- a/webhook_server/libs/github_api.py +++ b/webhook_server/libs/github_api.py @@ -118,8 +118,8 @@ def __init__(self, hook_data: dict[Any, Any], headers: Headers, logger: logging. self.repository_full_name: str = hook_data["repository"]["full_name"] self._bg_tasks: set[Task[Any]] = set() self.parent_committer: str = "" - self.last_committer: str = "" - self.last_author: str = "" + self.last_committer: str = "unknown" + self.last_author: str = "unknown" self.pr_base_sha: str = "" self.pr_head_sha: str = "" self.x_github_delivery: str = headers.get("X-GitHub-Delivery", "")