Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 2 additions & 0 deletions examples/config.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -150,6 +150,8 @@ security-checks:
- ".github/workflows/"
- ".github/actions/"
committer-identity-check: true
trusted-committers: # Additional trusted committers (GitHub App bot, web-flow, and API users are auto-included)
- "pre-commit-ci[bot]"

repositories:
my-repository:
Expand Down
10 changes: 10 additions & 0 deletions webhook_server/config/schema.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -95,6 +95,16 @@ $defs:
When enabled, compares the PR author (parent committer) against the last commit's
committer. Fails the security-committer-identity check run if they differ.
default: true
trusted-committers:
type: array
description: |
List of committer logins that are always trusted for the committer-identity check.
When the last committer matches an entry in this list, the check passes regardless
of the PR author. Useful for bots (e.g., pre-commit-ci[bot]) and org identities.
Note: The GitHub App bot, web-flow, and github-tokens API users are
automatically included — you only need to list additional external committers here.
items:
type: string
Comment thread
rnetser marked this conversation as resolved.
additionalProperties: false
type: object
properties:
Expand Down
2 changes: 2 additions & 0 deletions webhook_server/docs/pr-flow-api.md
Original file line number Diff line number Diff line change
Expand Up @@ -243,12 +243,14 @@ The PR Flow API integrates seamlessly with the log viewer system to provide:
```python
import httpx


async def analyze_pr_workflow(hook_id: str, base_url: str = "http://192.168.10.44:5003") -> dict:
async with httpx.AsyncClient() as client:
# Example using production webhook server
response = await client.get(f"{base_url}/logs/api/pr-flow/{hook_id}")
return response.json()


# Usage
workflow_data = await analyze_pr_workflow("f4b3c2d1-a9b8-4c5d-9e8f-1a2b3c4d5e6f")
print(f"Workflow health: {workflow_data['performance_metrics']['workflow_health']}")
Expand Down
83 changes: 62 additions & 21 deletions webhook_server/libs/github_api.py
Original file line number Diff line number Diff line change
Expand Up @@ -36,6 +36,7 @@
CONFIGURABLE_LABEL_CATEGORIES,
CONVENTIONAL_TITLE_STR,
DEFAULT_SUSPICIOUS_PATHS,
GITHUB_WEB_FLOW_LOGIN,
OTHER_MAIN_BRANCH,
PRE_COMMIT_STR,
PYTHON_MODULE_INSTALL_STR,
Expand Down Expand Up @@ -504,7 +505,7 @@ async def _recheck_merge_eligibility(self, pull_request: PullRequest) -> None:

async def process(self) -> Any:
# Early exit for pull_request_review_thread events that don't need processing.
# Must run BEFORE add_api_users_to_auto_verified_and_merged_users() to avoid
# Must run BEFORE get_api_users() to avoid
# burning rate limit on get_user() calls for skipped events.
if self.github_event == "pull_request_review_thread":
action = self.hook_data["action"]
Expand All @@ -526,7 +527,7 @@ async def process(self) -> Any:

# Early exit for status events with pending state — only terminal states
# (success, failure, error) need processing.
# Must run BEFORE add_api_users_to_auto_verified_and_merged_users() to avoid
# Must run BEFORE get_api_users() to avoid
# burning rate limit on get_user() calls for skipped events.
if self.github_event == "status":
state = self.hook_data["state"]
Expand All @@ -542,7 +543,8 @@ async def process(self) -> Any:
return None

# Initialize auto-verified users from API users (async operation)
await self.add_api_users_to_auto_verified_and_merged_users()
api_users = await self.get_api_users()
self.auto_verified_and_merged_users.extend(user for user in api_users if user is not None)

# Initialize app bot login for bot-PR identification (async)
if not self.app_bot_login:
Expand All @@ -569,6 +571,9 @@ async def process(self) -> Any:
else:
self.logger.debug(f"{self.log_prefix} No GitHub App API available — app_bot_login not set")

# Build final trusted-committers list with dynamic identities
await self._build_trusted_committers(api_users=api_users)

event_log: str = f"Event type: {self.github_event}. event ID: {self.x_github_delivery}"

# Start webhook routing context step
Expand Down Expand Up @@ -668,7 +673,7 @@ async def process(self) -> Any:
self.last_commit = await self._get_last_commit(pull_request=pull_request)
self.parent_committer = pull_request.user.login
self.last_committer = await github_api_call(
lambda: getattr(self.last_commit.committer, "login", "unknown"),
lambda: getattr(self.last_commit.committer, "login", "unknown") or "unknown",
logger=self.logger,
log_prefix=self.log_prefix,
)
Expand Down Expand Up @@ -835,27 +840,18 @@ async def process(self) -> Any:
await self._update_context_metrics()
return None

async def add_api_users_to_auto_verified_and_merged_users(self) -> None:
async def get_api_users(self) -> list[str | None]:
apis_and_tokens = get_apis_and_tokes_from_config(config=self.config)

async def check_token(api: github.Github, token: str) -> str | None:
"""Check a single API token and return the user login if valid, None otherwise."""
token_suffix = f"...{token[-4:]}" if token else "unknown"
try:
rate_limit_remaining = await github_api_call(
lambda: api.rate_limiting[-1], logger=self.logger, log_prefix=self.log_prefix
)
except Exception as ex:
self.logger.warning(
f"{self.log_prefix} Failed to get API rate limit for token ending in '{token_suffix}', "
f"skipping. {ex}"
)
return None

if rate_limit_remaining == 60:
self.logger.warning(
f"{self.log_prefix} API has rate limit set to 60 which indicates an invalid token "
f"(token ending in '{token_suffix}'), skipping"
# Pre-flight probe: verify token is functional before attempting get_user()
await github_api_call(lambda: api.rate_limiting[-1], logger=self.logger, log_prefix=self.log_prefix)
except Exception:
self.logger.exception(
f"{self.log_prefix} Failed to get API rate limit for token ending in '{token_suffix}', skipping"
)
Comment thread
rnetser marked this conversation as resolved.
return None

Expand All @@ -871,8 +867,7 @@ async def check_token(api: github.Github, token: str) -> str | None:

return _api_user

results = await asyncio.gather(*[check_token(api, token) for api, token in apis_and_tokens])
self.auto_verified_and_merged_users.extend(user for user in results if user is not None)
return await asyncio.gather(*[check_token(api, token) for api, token in apis_and_tokens])

def prepare_log_prefix(self, pull_request: PullRequest | None = None) -> str:
return prepare_log_prefix(
Expand Down Expand Up @@ -997,6 +992,19 @@ def _repo_data_from_config(self, repository_config: dict[str, Any]) -> None:
_mandatory_raw = True
self.security_mandatory: bool = _mandatory_raw

_trusted_committers = _security_config.get("trusted-committers", [])
if not isinstance(_trusted_committers, list):
self.logger.warning(
f"{self.log_prefix} security-checks.trusted-committers must be array, "
f"got {type(_trusted_committers).__name__}. Defaulting to empty list."
)
_trusted_committers = []
self.security_trusted_committers: list[str] = [
str(entry).strip().lower()
for entry in _trusted_committers
if isinstance(entry, (str, int, float)) and not isinstance(entry, bool) and str(entry).strip()
]

_auto_merge_prs = self.config.get_value(
value="set-auto-merge-prs", return_on_none=[], extra_dict=repository_config
)
Expand Down Expand Up @@ -1110,6 +1118,39 @@ def _sanitize_item(item: Any) -> str:

self.mask_sensitive = self.config.get_value("mask-sensitive-data", return_on_none=True)

async def _build_trusted_committers(self, api_users: list[str | None] | None = None) -> None:
"""Add dynamic entries to trusted-committers list.

Adds:
- GitHub App bot login (app_bot_login)
- GitHub's web-flow system account
- API user logins from github-tokens

Static config entries are already loaded in _repo_data_from_config().
Called once from process() after app_bot_login is initialized.
"""
# GitHub App bot
if self.app_bot_login:
bot = str(self.app_bot_login).strip().lower()
if bot and bot not in self.security_trusted_committers:
self.security_trusted_committers.append(bot)

# GitHub's web-flow system account
web_flow = GITHUB_WEB_FLOW_LOGIN.lower()
if web_flow not in self.security_trusted_committers:
self.security_trusted_committers.append(web_flow)

# API users from github-tokens
if api_users is None:
api_users = await self.get_api_users()
for user in api_users:
if user:
normalized = str(user).strip().lower()
if normalized and normalized not in self.security_trusted_committers:
self.security_trusted_committers.append(normalized)

self.logger.debug(f"{self.log_prefix} Trusted committers: {self.security_trusted_committers}")

async def get_pull_request(self, number: int | None = None) -> PullRequest | None:
if number:
self.logger.debug(f"{self.log_prefix} Attempting to get PR by number: {number}")
Expand Down
108 changes: 60 additions & 48 deletions webhook_server/libs/handlers/runner_handler.py
Original file line number Diff line number Diff line change
Expand Up @@ -372,8 +372,8 @@ async def run_security_suspicious_paths(self) -> None:
async def run_security_committer_identity(self) -> None:
"""Check if the last committer matches the PR author.

Fails the check run if the last commit's committer differs from the PR author
(parent_committer), which may indicate a commit was authored by someone unexpected.
Uses a unified trusted-committers list that includes static config entries,
the GitHub App bot login, web-flow, and API users from github-tokens.
"""
if not self.github_webhook.security_committer_identity_check:
self.logger.debug(f"{self.log_prefix} Committer identity check disabled, skipping")
Expand All @@ -385,15 +385,17 @@ async def run_security_committer_identity(self) -> None:
parent_committer = self.github_webhook.parent_committer
last_committer = self.github_webhook.last_committer

# SECURITY: "unknown" check MUST precede the trusted-committers check.
# An unverifiable committer identity should always fail.
if last_committer == "unknown":
output: CheckRunOutput = {
"title": "\u274c Security: Committer Identity Unknown",
"title": " Security: Committer Identity Unknown",
"summary": "Committer identity could not be verified",
"text": (
"## Committer Identity Check\n\n"
f"**PR author:** `{parent_committer}`\n"
"**Last commit committer:** unknown\n\n"
"Committer identity could not be verified \u2014 "
"Committer identity could not be verified "
"last commit has no associated GitHub user.\n\n"
"This may indicate:\n"
"- A commit was made with a local Git identity not linked to a GitHub account\n"
Expand All @@ -406,68 +408,78 @@ async def run_security_committer_identity(self) -> None:
f"PR author={parent_committer}, last committer has no GitHub user"
)
await self.check_run_handler.set_check_failure(name=SECURITY_COMMITTER_IDENTITY_STR, output=output)
elif last_committer == GITHUB_WEB_FLOW_LOGIN:
last_committer_id = self.github_webhook.last_committer_id
if last_committer_id == GITHUB_WEB_FLOW_USER_ID:
self.logger.debug(
f"{self.log_prefix} Last committer is GitHub's web-flow "
f"(user ID {last_committer_id}) — passing committer identity check"

elif last_committer.lower() != parent_committer.lower():
if last_committer.lower() in self.github_webhook.security_trusted_committers:
# Extra guard: verify web-flow by immutable user ID to prevent impersonation
if last_committer.lower() == GITHUB_WEB_FLOW_LOGIN:
last_committer_id = self.github_webhook.last_committer_id
if last_committer_id != GITHUB_WEB_FLOW_USER_ID:
self.logger.warning(
f"{self.log_prefix} Committer login is 'web-flow' but user ID "
f"{last_committer_id} does not match GitHub's web-flow ID "
f"{GITHUB_WEB_FLOW_USER_ID} — possible impersonation"
)
output = {
"title": "❌ Security: Committer Identity Suspicious",
"summary": (
f"Committer claims to be web-flow but has unexpected user ID {last_committer_id}"
),
"text": (
f"## Committer Identity Check\n\n"
f"**PR author:** `{parent_committer}`\n"
f"**Last commit committer:** `{last_committer}` (ID: {last_committer_id})\n"
f"**Expected web-flow ID:** {GITHUB_WEB_FLOW_USER_ID}\n\n"
f"The committer login is `web-flow` but the user ID does not match "
f"GitHub's official web-flow account. This may indicate an impersonation attempt."
),
}
await self.check_run_handler.set_check_failure(
name=SECURITY_COMMITTER_IDENTITY_STR, output=output
)
return
Comment thread
rnetser marked this conversation as resolved.

# Trusted committer — pass
self.logger.info(
f"{self.log_prefix} Committer identity: '{last_committer}' is in unified trusted list"
)
output = {
"title": "Security: Committer Identity",
"summary": "Committer identity verified (GitHub web-flow)",
"summary": f"Committer '{last_committer}' is trusted",
"text": (
f"## Committer Identity Check\n\n"
f"**PR author:** `{parent_committer}`\n"
f"**Last commit committer:** `{last_committer}` (ID: {last_committer_id})\n\n"
f"The last commit was made via the GitHub web UI (rebase, merge, or edit). "
f"The committer is GitHub's verified `web-flow` system account "
f"(user ID {GITHUB_WEB_FLOW_USER_ID}), confirming this is a legitimate "
f"GitHub web operation."
f"**Last commit committer:** `{last_committer}`\n\n"
f"The committer differs from the PR author but is in the trusted committers list.\n"
f"This is expected for automated workflows (bots, CI tools, org identities, "
f"GitHub web operations)."
),
}
await self.check_run_handler.set_check_success(name=SECURITY_COMMITTER_IDENTITY_STR, output=output)
else:
self.logger.warning(
f"{self.log_prefix} Committer login is 'web-flow' but user ID "
f"{last_committer_id} does not match GitHub's web-flow ID "
f"{GITHUB_WEB_FLOW_USER_ID} — possible impersonation"
)
# Untrusted mismatch — fail
output = {
"title": "\u274c Security: Committer Identity Suspicious",
"summary": f"Committer claims to be web-flow but has unexpected user ID {last_committer_id}",
"title": " Security: Committer Identity Mismatch",
"summary": f"Last committer '{last_committer}' differs from PR author '{parent_committer}'",
"text": (
f"## Committer Identity Check\n\n"
f"**PR author:** `{parent_committer}`\n"
f"**Last commit committer:** `{last_committer}` (ID: {last_committer_id})\n"
f"**Expected web-flow ID:** {GITHUB_WEB_FLOW_USER_ID}\n\n"
f"The committer login is `web-flow` but the user ID does not match "
f"GitHub's official web-flow account. This may indicate an impersonation attempt."
f"**Last commit committer:** `{last_committer}`\n\n"
f"The last commit in this PR was made by a different user than the PR author. "
f"This may indicate:\n"
f"- An unauthorized commit was pushed to the PR branch\n"
f"- A bot or automation tool committed with unexpected credentials\n"
f"- A legitimate co-author contribution (review carefully)\n\n"
f"Please verify this is expected before merging."
),
}
self.logger.warning(
f"{self.log_prefix} Committer identity mismatch: "
f"PR author={parent_committer}, last committer={last_committer}"
)
await self.check_run_handler.set_check_failure(name=SECURITY_COMMITTER_IDENTITY_STR, output=output)
elif last_committer != parent_committer:
output = {
"title": "\u274c Security: Committer Identity Mismatch",
"summary": f"Last committer '{last_committer}' differs from PR author '{parent_committer}'",
"text": (
f"## Committer Identity Check\n\n"
f"**PR author:** `{parent_committer}`\n"
f"**Last commit committer:** `{last_committer}`\n\n"
f"The last commit in this PR was made by a different user than the PR author. "
f"This may indicate:\n"
f"- An unauthorized commit was pushed to the PR branch\n"
f"- A bot or automation tool committed with unexpected credentials\n"
f"- A legitimate co-author contribution (review carefully)\n\n"
f"Please verify this is expected before merging."
),
}
self.logger.warning(
f"{self.log_prefix} Committer identity mismatch: "
f"PR author={parent_committer}, last committer={last_committer}"
)
await self.check_run_handler.set_check_failure(name=SECURITY_COMMITTER_IDENTITY_STR, output=output)
else:
# Match — pass
output = {
"title": "Security: Committer Identity",
"summary": "Committer identity verified",
Expand Down
Loading