diff --git a/webhook_server/libs/github_api.py b/webhook_server/libs/github_api.py index 5db4d3a9a..e95367566 100644 --- a/webhook_server/libs/github_api.py +++ b/webhook_server/libs/github_api.py @@ -118,6 +118,8 @@ def __init__(self, hook_data: dict[Any, Any], headers: Headers, logger: logging. self.repository_full_name: str = hook_data["repository"]["full_name"] self._bg_tasks: set[Task[Any]] = set() self.parent_committer: str = "" + self.last_committer: str = "unknown" + self.last_author: str = "unknown" self.pr_base_sha: str = "" self.pr_head_sha: str = "" self.x_github_delivery: str = headers.get("X-GitHub-Delivery", "") @@ -663,7 +665,16 @@ async def process(self) -> Any: self.last_commit = await self._get_last_commit(pull_request=pull_request) self.parent_committer = pull_request.user.login - self.last_committer = getattr(self.last_commit.committer, "login", "unknown") + self.last_committer = await github_api_call( + lambda: getattr(self.last_commit.committer, "login", "unknown"), + logger=self.logger, + log_prefix=self.log_prefix, + ) + self.last_author = await github_api_call( + lambda: getattr(self.last_commit.author, "login", "unknown"), + logger=self.logger, + log_prefix=self.log_prefix, + ) # Store PR SHAs: prefer webhook payload (avoids race condition with live API) # For pull_request events, base.sha and head.sha are guaranteed by GitHub webhook spec. diff --git a/webhook_server/libs/handlers/runner_handler.py b/webhook_server/libs/handlers/runner_handler.py index 1a03435c4..421f4bcbd 100644 --- a/webhook_server/libs/handlers/runner_handler.py +++ b/webhook_server/libs/handlers/runner_handler.py @@ -25,6 +25,7 @@ BUILD_CONTAINER_STR, CHERRY_PICKED_LABEL, CONVENTIONAL_TITLE_STR, + GITHUB_WEB_FLOW_LOGIN, PRE_COMMIT_STR, PREK_STR, PYTHON_MODULE_INSTALL_STR, @@ -404,6 +405,67 @@ async def run_security_committer_identity(self) -> None: f"PR author={parent_committer}, last committer has no GitHub user" ) await self.check_run_handler.set_check_failure(name=SECURITY_COMMITTER_IDENTITY_STR, output=output) + elif last_committer == GITHUB_WEB_FLOW_LOGIN: + last_author = self.github_webhook.last_author + if last_author == parent_committer: + self.logger.debug( + f"{self.log_prefix} Last committer is '{GITHUB_WEB_FLOW_LOGIN}' " + f"(GitHub web UI operation), author '{last_author}' matches PR author " + f"— passing committer identity check" + ) + output = { + "title": "Security: Committer Identity", + "summary": "Committer identity verified (GitHub web-flow)", + "text": ( + f"## Committer Identity Check\n\n" + f"**PR author:** `{parent_committer}`\n" + f"**Last commit committer:** `{last_committer}`\n" + f"**Last commit author:** `{last_author}`\n\n" + f"The last commit was made via the GitHub web UI (rebase, merge, or edit). " + f"The `web-flow` committer is GitHub's internal account for web-based operations. " + f"The commit author matches the PR author." + ), + } + await self.check_run_handler.set_check_success(name=SECURITY_COMMITTER_IDENTITY_STR, output=output) + elif last_author == "unknown": + self.logger.warning( + f"{self.log_prefix} Web-flow commit with unknown author: " + f"PR author={parent_committer}, committer={last_committer}, author=unknown" + ) + output = { + "title": "\u274c Security: Committer Identity Unknown (web-flow)", + "summary": "Web-flow commit author could not be verified", + "text": ( + f"## Committer Identity Check\n\n" + f"**PR author:** `{parent_committer}`\n" + f"**Last commit committer:** `{last_committer}`\n" + f"**Last commit author:** unknown\n\n" + f"The last commit was made via the GitHub web UI, but the commit author " + f"could not be determined. Please verify the commit authorship before merging." + ), + } + await self.check_run_handler.set_check_failure(name=SECURITY_COMMITTER_IDENTITY_STR, output=output) + else: + self.logger.warning( + f"{self.log_prefix} Web-flow commit author mismatch: " + f"PR author={parent_committer}, committer={last_committer}, author={last_author}" + ) + output = { + "title": "\u274c Security: Committer Identity Mismatch (web-flow)", + "summary": ( + f"Web-flow commit author '{last_author}' differs from PR author '{parent_committer}'" + ), + "text": ( + f"## Committer Identity Check\n\n" + f"**PR author:** `{parent_committer}`\n" + f"**Last commit committer:** `{last_committer}`\n" + f"**Last commit author:** `{last_author}`\n\n" + f"The last commit was made via the GitHub web UI by a different user than the PR author. " + f"This may indicate an unexpected web-based change to the PR branch.\n\n" + f"Please verify this is expected before merging." + ), + } + await self.check_run_handler.set_check_failure(name=SECURITY_COMMITTER_IDENTITY_STR, output=output) elif last_committer != parent_committer: output = { "title": "\u274c Security: Committer Identity Mismatch", diff --git a/webhook_server/tests/test_security_checks.py b/webhook_server/tests/test_security_checks.py index 5099e77e8..2023fbfde 100644 --- a/webhook_server/tests/test_security_checks.py +++ b/webhook_server/tests/test_security_checks.py @@ -21,6 +21,7 @@ BUILTIN_CHECK_NAMES, COMMAND_SECURITY_OVERRIDE_STR, DEFAULT_SUSPICIOUS_PATHS, + GITHUB_WEB_FLOW_LOGIN, SECURITY_COMMITTER_IDENTITY_STR, SECURITY_SUSPICIOUS_PATHS_STR, ) @@ -287,6 +288,58 @@ async def test_committer_identity_unknown(self, runner_handler: RunnerHandler) - assert "could not be verified" in output["summary"] assert "no associated GitHub user" in output["text"] + @pytest.mark.asyncio + async def test_committer_identity_web_flow(self, runner_handler: RunnerHandler) -> None: + """Check passes when last committer is GitHub's web-flow bot.""" + runner_handler.github_webhook.parent_committer = "legit-user" + runner_handler.github_webhook.last_committer = GITHUB_WEB_FLOW_LOGIN + runner_handler.github_webhook.last_author = "legit-user" + + with patch.object(runner_handler.check_run_handler, "set_check_in_progress", new=AsyncMock()) as mock_progress: + with patch.object(runner_handler.check_run_handler, "set_check_success", new=AsyncMock()) as mock_success: + await runner_handler.run_security_committer_identity() + + mock_progress.assert_called_once_with(name=SECURITY_COMMITTER_IDENTITY_STR) + mock_success.assert_called_once() + call_args = mock_success.call_args + assert call_args.kwargs["name"] == SECURITY_COMMITTER_IDENTITY_STR + assert "web-flow" in call_args.kwargs["output"]["summary"] + assert GITHUB_WEB_FLOW_LOGIN in call_args.kwargs["output"]["text"] + + @pytest.mark.asyncio + async def test_committer_identity_web_flow_author_mismatch(self, runner_handler: RunnerHandler) -> None: + """Check fails when web-flow commit has different author than PR author.""" + runner_handler.github_webhook.parent_committer = "legit-user" + runner_handler.github_webhook.last_committer = GITHUB_WEB_FLOW_LOGIN + runner_handler.github_webhook.last_author = "other-user" + + with patch.object(runner_handler.check_run_handler, "set_check_in_progress", new=AsyncMock()) as mock_progress: + with patch.object(runner_handler.check_run_handler, "set_check_failure", new=AsyncMock()) as mock_failure: + await runner_handler.run_security_committer_identity() + + mock_progress.assert_called_once_with(name=SECURITY_COMMITTER_IDENTITY_STR) + mock_failure.assert_called_once() + call_args = mock_failure.call_args + assert call_args.kwargs["name"] == SECURITY_COMMITTER_IDENTITY_STR + assert "other-user" in call_args.kwargs["output"]["summary"] + + @pytest.mark.asyncio + async def test_committer_identity_web_flow_unknown_author(self, runner_handler: RunnerHandler) -> None: + """Check fails when web-flow commit has unknown author.""" + runner_handler.github_webhook.parent_committer = "legit-user" + runner_handler.github_webhook.last_committer = GITHUB_WEB_FLOW_LOGIN + runner_handler.github_webhook.last_author = "unknown" + + with patch.object(runner_handler.check_run_handler, "set_check_in_progress", new=AsyncMock()) as mock_progress: + with patch.object(runner_handler.check_run_handler, "set_check_failure", new=AsyncMock()) as mock_failure: + await runner_handler.run_security_committer_identity() + + mock_progress.assert_called_once_with(name=SECURITY_COMMITTER_IDENTITY_STR) + mock_failure.assert_called_once() + call_args = mock_failure.call_args + assert call_args.kwargs["name"] == SECURITY_COMMITTER_IDENTITY_STR + assert "could not be verified" in call_args.kwargs["output"]["summary"] + @pytest.mark.asyncio async def test_committer_identity_check_disabled(self, runner_handler: RunnerHandler) -> None: """Check is skipped when committer-identity-check is disabled.""" diff --git a/webhook_server/utils/constants.py b/webhook_server/utils/constants.py index e1bc4aa4c..850ef37b7 100644 --- a/webhook_server/utils/constants.py +++ b/webhook_server/utils/constants.py @@ -18,6 +18,7 @@ CONVENTIONAL_TITLE_STR: str = "conventional-title" SECURITY_SUSPICIOUS_PATHS_STR: str = "security-suspicious-paths" SECURITY_COMMITTER_IDENTITY_STR: str = "security-committer-identity" +GITHUB_WEB_FLOW_LOGIN: str = "web-flow" COMMAND_SECURITY_OVERRIDE_STR: str = "security-override" WIP_STR: str = "wip" LGTM_STR: str = "lgtm"