diff --git a/webhook_server/libs/github_api.py b/webhook_server/libs/github_api.py index e95367566..457e23d4e 100644 --- a/webhook_server/libs/github_api.py +++ b/webhook_server/libs/github_api.py @@ -119,7 +119,7 @@ def __init__(self, hook_data: dict[Any, Any], headers: Headers, logger: logging. self._bg_tasks: set[Task[Any]] = set() self.parent_committer: str = "" self.last_committer: str = "unknown" - self.last_author: str = "unknown" + self.last_committer_id: int = 0 self.pr_base_sha: str = "" self.pr_head_sha: str = "" self.x_github_delivery: str = headers.get("X-GitHub-Delivery", "") @@ -670,11 +670,14 @@ async def process(self) -> Any: logger=self.logger, log_prefix=self.log_prefix, ) - self.last_author = await github_api_call( - lambda: getattr(self.last_commit.author, "login", "unknown"), + self.last_committer_id = await github_api_call( + lambda: getattr(self.last_commit.committer, "id", 0), logger=self.logger, log_prefix=self.log_prefix, ) + self.logger.debug( + f"{self.log_prefix} Last commit: committer='{self.last_committer}' (ID: {self.last_committer_id})" + ) # Store PR SHAs: prefer webhook payload (avoids race condition with live API) # For pull_request events, base.sha and head.sha are guaranteed by GitHub webhook spec. diff --git a/webhook_server/libs/handlers/runner_handler.py b/webhook_server/libs/handlers/runner_handler.py index 421f4bcbd..c8d49e83f 100644 --- a/webhook_server/libs/handlers/runner_handler.py +++ b/webhook_server/libs/handlers/runner_handler.py @@ -26,6 +26,7 @@ CHERRY_PICKED_LABEL, CONVENTIONAL_TITLE_STR, GITHUB_WEB_FLOW_LOGIN, + GITHUB_WEB_FLOW_USER_ID, PRE_COMMIT_STR, PREK_STR, PYTHON_MODULE_INSTALL_STR, @@ -406,12 +407,11 @@ async def run_security_committer_identity(self) -> None: ) await self.check_run_handler.set_check_failure(name=SECURITY_COMMITTER_IDENTITY_STR, output=output) elif last_committer == GITHUB_WEB_FLOW_LOGIN: - last_author = self.github_webhook.last_author - if last_author == parent_committer: + last_committer_id = self.github_webhook.last_committer_id + if last_committer_id == GITHUB_WEB_FLOW_USER_ID: self.logger.debug( - f"{self.log_prefix} Last committer is '{GITHUB_WEB_FLOW_LOGIN}' " - f"(GitHub web UI operation), author '{last_author}' matches PR author " - f"— passing committer identity check" + f"{self.log_prefix} Last committer is GitHub's web-flow " + f"(user ID {last_committer_id}) — passing committer identity check" ) output = { "title": "Security: Committer Identity", @@ -419,50 +419,30 @@ async def run_security_committer_identity(self) -> None: "text": ( f"## Committer Identity Check\n\n" f"**PR author:** `{parent_committer}`\n" - f"**Last commit committer:** `{last_committer}`\n" - f"**Last commit author:** `{last_author}`\n\n" + f"**Last commit committer:** `{last_committer}` (ID: {last_committer_id})\n\n" f"The last commit was made via the GitHub web UI (rebase, merge, or edit). " - f"The `web-flow` committer is GitHub's internal account for web-based operations. " - f"The commit author matches the PR author." + f"The committer is GitHub's verified `web-flow` system account " + f"(user ID {GITHUB_WEB_FLOW_USER_ID}), confirming this is a legitimate " + f"GitHub web operation." ), } await self.check_run_handler.set_check_success(name=SECURITY_COMMITTER_IDENTITY_STR, output=output) - elif last_author == "unknown": - self.logger.warning( - f"{self.log_prefix} Web-flow commit with unknown author: " - f"PR author={parent_committer}, committer={last_committer}, author=unknown" - ) - output = { - "title": "\u274c Security: Committer Identity Unknown (web-flow)", - "summary": "Web-flow commit author could not be verified", - "text": ( - f"## Committer Identity Check\n\n" - f"**PR author:** `{parent_committer}`\n" - f"**Last commit committer:** `{last_committer}`\n" - f"**Last commit author:** unknown\n\n" - f"The last commit was made via the GitHub web UI, but the commit author " - f"could not be determined. Please verify the commit authorship before merging." - ), - } - await self.check_run_handler.set_check_failure(name=SECURITY_COMMITTER_IDENTITY_STR, output=output) else: self.logger.warning( - f"{self.log_prefix} Web-flow commit author mismatch: " - f"PR author={parent_committer}, committer={last_committer}, author={last_author}" + f"{self.log_prefix} Committer login is 'web-flow' but user ID " + f"{last_committer_id} does not match GitHub's web-flow ID " + f"{GITHUB_WEB_FLOW_USER_ID} — possible impersonation" ) output = { - "title": "\u274c Security: Committer Identity Mismatch (web-flow)", - "summary": ( - f"Web-flow commit author '{last_author}' differs from PR author '{parent_committer}'" - ), + "title": "\u274c Security: Committer Identity Suspicious", + "summary": f"Committer claims to be web-flow but has unexpected user ID {last_committer_id}", "text": ( f"## Committer Identity Check\n\n" f"**PR author:** `{parent_committer}`\n" - f"**Last commit committer:** `{last_committer}`\n" - f"**Last commit author:** `{last_author}`\n\n" - f"The last commit was made via the GitHub web UI by a different user than the PR author. " - f"This may indicate an unexpected web-based change to the PR branch.\n\n" - f"Please verify this is expected before merging." + f"**Last commit committer:** `{last_committer}` (ID: {last_committer_id})\n" + f"**Expected web-flow ID:** {GITHUB_WEB_FLOW_USER_ID}\n\n" + f"The committer login is `web-flow` but the user ID does not match " + f"GitHub's official web-flow account. This may indicate an impersonation attempt." ), } await self.check_run_handler.set_check_failure(name=SECURITY_COMMITTER_IDENTITY_STR, output=output) diff --git a/webhook_server/tests/test_security_checks.py b/webhook_server/tests/test_security_checks.py index 2023fbfde..fcdd866f0 100644 --- a/webhook_server/tests/test_security_checks.py +++ b/webhook_server/tests/test_security_checks.py @@ -22,6 +22,7 @@ COMMAND_SECURITY_OVERRIDE_STR, DEFAULT_SUSPICIOUS_PATHS, GITHUB_WEB_FLOW_LOGIN, + GITHUB_WEB_FLOW_USER_ID, SECURITY_COMMITTER_IDENTITY_STR, SECURITY_SUSPICIOUS_PATHS_STR, ) @@ -290,10 +291,10 @@ async def test_committer_identity_unknown(self, runner_handler: RunnerHandler) - @pytest.mark.asyncio async def test_committer_identity_web_flow(self, runner_handler: RunnerHandler) -> None: - """Check passes when last committer is GitHub's web-flow bot.""" + """Check passes when last committer is GitHub's verified web-flow account.""" runner_handler.github_webhook.parent_committer = "legit-user" runner_handler.github_webhook.last_committer = GITHUB_WEB_FLOW_LOGIN - runner_handler.github_webhook.last_author = "legit-user" + runner_handler.github_webhook.last_committer_id = GITHUB_WEB_FLOW_USER_ID with patch.object(runner_handler.check_run_handler, "set_check_in_progress", new=AsyncMock()) as mock_progress: with patch.object(runner_handler.check_run_handler, "set_check_success", new=AsyncMock()) as mock_success: @@ -304,14 +305,14 @@ async def test_committer_identity_web_flow(self, runner_handler: RunnerHandler) call_args = mock_success.call_args assert call_args.kwargs["name"] == SECURITY_COMMITTER_IDENTITY_STR assert "web-flow" in call_args.kwargs["output"]["summary"] - assert GITHUB_WEB_FLOW_LOGIN in call_args.kwargs["output"]["text"] + assert str(GITHUB_WEB_FLOW_USER_ID) in call_args.kwargs["output"]["text"] @pytest.mark.asyncio - async def test_committer_identity_web_flow_author_mismatch(self, runner_handler: RunnerHandler) -> None: - """Check fails when web-flow commit has different author than PR author.""" + async def test_committer_identity_web_flow_fake_id(self, runner_handler: RunnerHandler) -> None: + """Check fails when committer claims to be web-flow but has wrong user ID.""" runner_handler.github_webhook.parent_committer = "legit-user" runner_handler.github_webhook.last_committer = GITHUB_WEB_FLOW_LOGIN - runner_handler.github_webhook.last_author = "other-user" + runner_handler.github_webhook.last_committer_id = 99999999 with patch.object(runner_handler.check_run_handler, "set_check_in_progress", new=AsyncMock()) as mock_progress: with patch.object(runner_handler.check_run_handler, "set_check_failure", new=AsyncMock()) as mock_failure: @@ -321,24 +322,7 @@ async def test_committer_identity_web_flow_author_mismatch(self, runner_handler: mock_failure.assert_called_once() call_args = mock_failure.call_args assert call_args.kwargs["name"] == SECURITY_COMMITTER_IDENTITY_STR - assert "other-user" in call_args.kwargs["output"]["summary"] - - @pytest.mark.asyncio - async def test_committer_identity_web_flow_unknown_author(self, runner_handler: RunnerHandler) -> None: - """Check fails when web-flow commit has unknown author.""" - runner_handler.github_webhook.parent_committer = "legit-user" - runner_handler.github_webhook.last_committer = GITHUB_WEB_FLOW_LOGIN - runner_handler.github_webhook.last_author = "unknown" - - with patch.object(runner_handler.check_run_handler, "set_check_in_progress", new=AsyncMock()) as mock_progress: - with patch.object(runner_handler.check_run_handler, "set_check_failure", new=AsyncMock()) as mock_failure: - await runner_handler.run_security_committer_identity() - - mock_progress.assert_called_once_with(name=SECURITY_COMMITTER_IDENTITY_STR) - mock_failure.assert_called_once() - call_args = mock_failure.call_args - assert call_args.kwargs["name"] == SECURITY_COMMITTER_IDENTITY_STR - assert "could not be verified" in call_args.kwargs["output"]["summary"] + assert "99999999" in call_args.kwargs["output"]["summary"] @pytest.mark.asyncio async def test_committer_identity_check_disabled(self, runner_handler: RunnerHandler) -> None: diff --git a/webhook_server/utils/constants.py b/webhook_server/utils/constants.py index 850ef37b7..6e4c5111f 100644 --- a/webhook_server/utils/constants.py +++ b/webhook_server/utils/constants.py @@ -19,6 +19,7 @@ SECURITY_SUSPICIOUS_PATHS_STR: str = "security-suspicious-paths" SECURITY_COMMITTER_IDENTITY_STR: str = "security-committer-identity" GITHUB_WEB_FLOW_LOGIN: str = "web-flow" +GITHUB_WEB_FLOW_USER_ID: int = 19864447 # GitHub's permanent system account for web UI operations (immutable ID) COMMAND_SECURITY_OVERRIDE_STR: str = "security-override" WIP_STR: str = "wip" LGTM_STR: str = "lgtm"