From b61e5ad0b7d4e25ae184c909cd9b84d4bfdd9720 Mon Sep 17 00:00:00 2001 From: Gaurav Vaidya Date: Thu, 19 Feb 2026 17:39:25 -0500 Subject: [PATCH 1/2] Parametrize GitHub issue tests by assertion rather than by issue. Each BabelTest assertion now gets its own pytest test entry, so individual assertion failures are reported separately instead of being collapsed into a single per-issue result. Achieved by: - Adding `repo_id` to `GitHubIssueTest` so `__str__` no longer triggers lazy PyGitHub API calls to resolve org/repo name. - Adding `repo_id` param to `get_test_issues_from_issue()` and threading it through to every `GitHubIssueTest` constructor. - Adding `GitHubIssuesTestCases.get_all_test_issues()` which returns a flat `list[pytest.param]`, one per assertion (issues with no BabelTest syntax are silently skipped). - Rewriting `test_github_issues.py` to parametrize over `GitHubIssueTest` objects via `get_all_test_issues()`, eliminating the inner loop that hid individual failures. The `--issue` filter is updated to use the stored `repo_id` field. Co-Authored-By: Claude Sonnet 4.6 --- .../github/github_issues_test_cases.py | 54 ++++++++++--- tests/github_issues/test_github_issues.py | 76 ++++++++++--------- 2 files changed, 83 insertions(+), 47 deletions(-) diff --git a/src/babel_validation/sources/github/github_issues_test_cases.py b/src/babel_validation/sources/github/github_issues_test_cases.py index f7c7b82..72457c4 100644 --- a/src/babel_validation/sources/github/github_issues_test_cases.py +++ b/src/babel_validation/sources/github/github_issues_test_cases.py @@ -15,7 +15,7 @@ class GitHubIssueTest: - def __init__(self, github_issue: Issue.Issue, assertion: str, param_sets: list[list[str]] = None): + def __init__(self, github_issue: Issue.Issue, assertion: str, param_sets: list[list[str]] = None, repo_id: str = ""): self.github_issue = github_issue self.assertion = assertion if param_sets is None: @@ -23,12 +23,13 @@ def __init__(self, github_issue: Issue.Issue, assertion: str, param_sets: list[l self.param_sets = param_sets if not isinstance(self.param_sets, list): raise ValueError(f"param_sets must be a list when creating a GitHubIssueTest({self.github_issue}, {self.assertion}, {self.param_sets})") + self.repo_id = repo_id self.logger = logging.getLogger(str(self)) self.logger.info(f"Creating GitHubIssueTest for {github_issue.html_url} {assertion}({param_sets})") def __str__(self): - return f"{self.github_issue.repository.organization.name}/{self.github_issue.repository.name}#{self.github_issue.number}: {self.assertion}({len(self.param_sets)} param sets: {json.dumps(self.param_sets)})" + return f"{self.repo_id}#{self.github_issue.number}: {self.assertion}({len(self.param_sets)} param sets: {json.dumps(self.param_sets)})" def test_with_nodenorm(self, nodenorm: CachedNodeNorm) -> Iterator[TestResult]: handler = ASSERTION_HANDLERS.get(self.assertion.lower()) @@ -83,7 +84,7 @@ def __init__(self, github_token: str, github_repositories=None): self.babeltest_pattern = re.compile(r'{{BabelTest\|.*?}}') self.babeltest_yaml_pattern = re.compile(r'```yaml\s+babel_tests:\s+.*?\s+```', re.DOTALL) - def get_test_issues_from_issue(self, github_issue: Issue.Issue) -> list[GitHubIssueTest]: + def get_test_issues_from_issue(self, github_issue: Issue.Issue, repo_id: str = "") -> list[GitHubIssueTest]: """ Extract test rows from a single GitHub issue. @@ -102,15 +103,11 @@ def get_test_issues_from_issue(self, github_issue: Issue.Issue) -> list[GitHubIs src/babel_validation/assertions/README.md or inspect ASSERTION_HANDLERS.keys(). :param github_issue: A single GitHub issue to extract test cases from. + :param repo_id: The repository identifier string (e.g. "NCATSTranslator/Babel"). :return: A list of GitHubIssueTest objects found in the issue body. """ - github_issue_id = f"{github_issue.number}" - # Ideally, we would use: - # f"{github_issue.repository.organization.name}/{github_issue.repository.name}#{github_issue.number}" - # But that is very slow. - # TODO: Wrap Issue.Issue so that we can store orgName and repoName locally so we don't need to call out - # to figure it out. + github_issue_id = f"{repo_id}#{github_issue.number}" if repo_id else f"{github_issue.number}" self.logger.debug(f"Looking for tests in issue {github_issue_id}: {github_issue.title} ({str(github_issue.state)}, {github_issue.html_url})") # Is there an issue body at all? @@ -135,7 +132,7 @@ def get_test_issues_from_issue(self, github_issue: Issue.Issue) -> list[GitHubIs if len(params) < 2: raise ValueError(f"Too few parameters found in BabelTest in issue {github_issue_id}: {match}") else: - testrows.append(GitHubIssueTest(github_issue, params[0], [params[1:]])) + testrows.append(GitHubIssueTest(github_issue, params[0], [params[1:]], repo_id=repo_id)) babeltest_yaml_matches = re.findall(self.babeltest_yaml_pattern, github_issue.body) if babeltest_yaml_matches: @@ -158,7 +155,7 @@ def get_test_issues_from_issue(self, github_issue: Issue.Issue) -> list[GitHubIs param_sets.append(param_set) else: raise RuntimeError(f"Unknown parameter set type {param_set} in issue {github_issue_id}") - testrows.append(GitHubIssueTest(github_issue, assertion, param_sets)) + testrows.append(GitHubIssueTest(github_issue, assertion, param_sets, repo_id=repo_id)) return testrows @@ -183,3 +180,38 @@ def get_all_issues(self, github_repositories = None) -> Iterator[Issue.Issue]: yield issue self.logger.info(f"Found {issue_count} issues in GitHub repository {repo_id}") + + def get_all_test_issues(self, github_repositories=None) -> list: + """ + Get all BabelTest assertions across one or more repositories as a flat list of pytest ParameterSets. + + Each GitHubIssueTest (one assertion from one issue) becomes its own ParameterSet, so pytest + reports a separate pass/fail for every assertion rather than collapsing an entire issue into + a single test. + + Issues that contain no BabelTest assertions are silently skipped. + + :param github_repositories: A list of GitHub repositories to search for test cases. If none is + provided, we default to the list specified when creating this GitHubIssuesTestCases class. + :return: A list of pytest.param objects, one per GitHubIssueTest. + """ + import pytest as _pytest + + if github_repositories is None: + github_repositories = self.github_repositories + + result = [] + for repo_id in github_repositories: + self.logger.info(f"Looking up issues in GitHub repository {repo_id}") + repo = self.github.get_repo(repo_id, lazy=True) + + issue_count = 0 + for issue in tqdm(repo.get_issues(state='all', sort='updated'), desc=repo_id): + issue_count += 1 + tests = self.get_test_issues_from_issue(issue, repo_id=repo_id) + for test_issue in tests: + result.append(_pytest.param(test_issue, id=str(test_issue))) + + self.logger.info(f"Found {issue_count} issues in GitHub repository {repo_id}") + + return result diff --git a/tests/github_issues/test_github_issues.py b/tests/github_issues/test_github_issues.py index 94252f2..1ab9765 100644 --- a/tests/github_issues/test_github_issues.py +++ b/tests/github_issues/test_github_issues.py @@ -3,17 +3,12 @@ import dotenv import pytest -from github import Issue -from src.babel_validation.sources.github.github_issues_test_cases import GitHubIssuesTestCases +from src.babel_validation.sources.github.github_issues_test_cases import GitHubIssueTest, GitHubIssuesTestCases from src.babel_validation.services.nameres import CachedNameRes from src.babel_validation.services.nodenorm import CachedNodeNorm from src.babel_validation.core.testrow import TestResult, TestStatus -# Helper functions -def get_github_issue_id(github_issue: Issue.Issue): - return f"{github_issue.repository.organization.name}/{github_issue.repository.name}#{github_issue.number}" - # Initialize the test. dotenv.load_dotenv() github_token = os.getenv('GITHUB_TOKEN') @@ -24,51 +19,60 @@ def get_github_issue_id(github_issue: Issue.Issue): 'TranslatorSRI/babel-validation', # https://github.com/TranslatorSRI/babel-validation ]) -@pytest.mark.parametrize("github_issue", github_issues_test_cases.get_all_issues()) -def test_github_issue(target_info, github_issue, selected_github_issues): - # If github_issues is provided, we can skip all others. +github_issue_tests = github_issues_test_cases.get_all_test_issues() + +@pytest.mark.parametrize("github_issue_test", github_issue_tests) +def test_github_issue(target_info, github_issue_test: GitHubIssueTest, selected_github_issues): + # If --issue is provided, skip assertions not belonging to matching issues. if selected_github_issues: - # Check all three possible ways in which this issue might be specified. github_issue_matched = False for selected_github_issue in selected_github_issues: if '/' in selected_github_issue: - github_issue_matched = (f"{github_issue.repository.organization.name}/{github_issue.repository.name}#{github_issue.number}" == selected_github_issue) + # e.g. "NCATSTranslator/Babel#637" + github_issue_matched = ( + f"{github_issue_test.repo_id}#{github_issue_test.github_issue.number}" + == selected_github_issue + ) elif '#' in selected_github_issue: - github_issue_matched = (f"{github_issue.repository.name}#{github_issue.number}" == selected_github_issue) + # e.g. "Babel#637" + repo_name = github_issue_test.repo_id.split('/')[-1] if '/' in github_issue_test.repo_id else github_issue_test.repo_id + github_issue_matched = ( + f"{repo_name}#{github_issue_test.github_issue.number}" + == selected_github_issue + ) else: - github_issue_matched = int(selected_github_issue) == github_issue.number + # bare number, e.g. "637" + github_issue_matched = int(selected_github_issue) == github_issue_test.github_issue.number if github_issue_matched: break - if github_issue_matched: - # This issue is one of those that should be tested. - pass - else: - pytest.skip(f"GitHub Issue {str(github_issue)} not included in list of GitHub issues to be tested: {selected_github_issues}.") + if not github_issue_matched: + pytest.skip( + f"GitHub Issue {github_issue_test.repo_id}#{github_issue_test.github_issue.number} " + f"not included in list of GitHub issues to be tested: {selected_github_issues}." + ) return - # Test this issue with NodeNorm. nodenorm = CachedNodeNorm.from_url(target_info['NodeNormURL']) nameres = CachedNameRes.from_url(target_info['NameResURL']) - tests = github_issues_test_cases.get_test_issues_from_issue(github_issue) - if not tests: - pytest.skip(f"No tests found in issue {github_issue}") - return - for test_issue in tests: - results_nodenorm = test_issue.test_with_nodenorm(nodenorm) - results_nameres = test_issue.test_with_nameres(nodenorm, nameres) + results = itertools.chain( + github_issue_test.test_with_nodenorm(nodenorm), + github_issue_test.test_with_nameres(nodenorm, nameres), + ) + + issue_label = f"{github_issue_test.repo_id}#{github_issue_test.github_issue.number} ({github_issue_test.github_issue.state})" - for result in itertools.chain(results_nodenorm, results_nameres): - match result: - case TestResult(status=TestStatus.Passed, message=message): - assert True, f"{get_github_issue_id(github_issue)} ({github_issue.state}): {message}" + for result in results: + match result: + case TestResult(status=TestStatus.Passed, message=message): + assert True, f"{issue_label}: {message}" - case TestResult(status=TestStatus.Failed, message=message): - assert False, f"{get_github_issue_id(github_issue)} ({github_issue.state}): {message}" + case TestResult(status=TestStatus.Failed, message=message): + assert False, f"{issue_label}: {message}" - case TestResult(status=TestStatus.Skipped, message=message): - pytest.skip(f"{get_github_issue_id(github_issue)} ({github_issue.state}): {message}") + case TestResult(status=TestStatus.Skipped, message=message): + pytest.skip(f"{issue_label}: {message}") - case _: - assert False, f"Unknown result from {get_github_issue_id(github_issue)}: {result}" + case _: + assert False, f"Unknown result from {issue_label}: {result}" From 79af8e29b801ddd1c0e4b2f4d3e283241619c2b2 Mon Sep 17 00:00:00 2001 From: Gaurav Vaidya Date: Thu, 19 Feb 2026 18:30:12 -0500 Subject: [PATCH 2/2] Speed up --issue flag by fetching specific issues via single-issue API endpoint. MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Moves parametrization into a pytest_generate_tests hook in a new tests/github_issues/conftest.py. When --issue is provided, calls the new get_specific_test_issues() method which uses the single-issue REST endpoint (1–4 API calls) instead of paginating through all issues. Full runs are unaffected. Co-Authored-By: Claude Sonnet 4.6 --- .../github/github_issues_test_cases.py | 54 ++++++++++++++++++ tests/github_issues/conftest.py | 24 ++++++++ tests/github_issues/test_github_issues.py | 56 ++----------------- 3 files changed, 84 insertions(+), 50 deletions(-) create mode 100644 tests/github_issues/conftest.py diff --git a/src/babel_validation/sources/github/github_issues_test_cases.py b/src/babel_validation/sources/github/github_issues_test_cases.py index 72457c4..c8c94d4 100644 --- a/src/babel_validation/sources/github/github_issues_test_cases.py +++ b/src/babel_validation/sources/github/github_issues_test_cases.py @@ -159,6 +159,60 @@ def get_test_issues_from_issue(self, github_issue: Issue.Issue, repo_id: str = " return testrows + def get_specific_test_issues(self, issue_specs: list[str]) -> list: + """ + Fetch only the issues identified by issue_specs (values from --issue flag). + + Accepted formats (same as --issue CLI option): + - "637" bare number → try all configured repos + - "Babel#637" repo#number → match by repo name + - "NCATSTranslator/Babel#637" org/repo#number → exact match + + Uses the single-issue API endpoint (fast: 1–4 calls instead of paginating + through all issues). + """ + import pytest as _pytest + from github import GithubException + + result = [] + seen = set() # avoid duplicates if a spec matches the same issue twice + + for spec in issue_specs: + if '/' in spec and '#' in spec: + # "NCATSTranslator/Babel#637" + repo_id, issue_num_str = spec.rsplit('#', 1) + repos_to_check = [(repo_id, int(issue_num_str))] + elif '#' in spec: + # "Babel#637" + repo_name, issue_num_str = spec.split('#', 1) + repos_to_check = [ + (repo_id, int(issue_num_str)) + for repo_id in self.github_repositories + if repo_id.split('/')[-1] == repo_name + ] + else: + # bare number "637" + repos_to_check = [(repo_id, int(spec)) for repo_id in self.github_repositories] + + for repo_id, issue_num in repos_to_check: + key = (repo_id, issue_num) + if key in seen: + continue + seen.add(key) + try: + repo = self.github.get_repo(repo_id, lazy=True) + issue = repo.get_issue(issue_num) + tests = self.get_test_issues_from_issue(issue, repo_id=repo_id) + for test_issue in tests: + result.append(_pytest.param(test_issue, id=str(test_issue))) + except GithubException as e: + if e.status == 404: + pass # issue doesn't exist in this repo, that's fine + else: + raise + + return result + def get_all_issues(self, github_repositories = None) -> Iterator[Issue.Issue]: """ Get a list of test rows from one or more repositories. diff --git a/tests/github_issues/conftest.py b/tests/github_issues/conftest.py new file mode 100644 index 0000000..6464d1f --- /dev/null +++ b/tests/github_issues/conftest.py @@ -0,0 +1,24 @@ +import os + +import dotenv + +from src.babel_validation.sources.github.github_issues_test_cases import GitHubIssuesTestCases + +dotenv.load_dotenv() +_github_issues_test_cases = GitHubIssuesTestCases(os.getenv('GITHUB_TOKEN'), [ + 'NCATSTranslator/Babel', + 'NCATSTranslator/NodeNormalization', + 'NCATSTranslator/NameResolution', + 'TranslatorSRI/babel-validation', +]) + + +def pytest_generate_tests(metafunc): + if "github_issue_test" not in metafunc.fixturenames: + return + selected_issues = metafunc.config.getoption('issue', default=[]) + if selected_issues: + params = _github_issues_test_cases.get_specific_test_issues(selected_issues) + else: + params = _github_issues_test_cases.get_all_test_issues() + metafunc.parametrize("github_issue_test", params) diff --git a/tests/github_issues/test_github_issues.py b/tests/github_issues/test_github_issues.py index 1ab9765..81d820f 100644 --- a/tests/github_issues/test_github_issues.py +++ b/tests/github_issues/test_github_issues.py @@ -1,58 +1,14 @@ import itertools -import os -import dotenv import pytest -from src.babel_validation.sources.github.github_issues_test_cases import GitHubIssueTest, GitHubIssuesTestCases +from src.babel_validation.sources.github.github_issues_test_cases import GitHubIssueTest from src.babel_validation.services.nameres import CachedNameRes from src.babel_validation.services.nodenorm import CachedNodeNorm from src.babel_validation.core.testrow import TestResult, TestStatus -# Initialize the test. -dotenv.load_dotenv() -github_token = os.getenv('GITHUB_TOKEN') -github_issues_test_cases = GitHubIssuesTestCases(github_token, [ - 'NCATSTranslator/Babel', # https://github.com/NCATSTranslator/Babel - 'NCATSTranslator/NodeNormalization', # https://github.com/NCATSTranslator/NodeNormalization - 'NCATSTranslator/NameResolution', # https://github.com/NCATSTranslator/NameResolution - 'TranslatorSRI/babel-validation', # https://github.com/TranslatorSRI/babel-validation -]) - -github_issue_tests = github_issues_test_cases.get_all_test_issues() - -@pytest.mark.parametrize("github_issue_test", github_issue_tests) -def test_github_issue(target_info, github_issue_test: GitHubIssueTest, selected_github_issues): - # If --issue is provided, skip assertions not belonging to matching issues. - if selected_github_issues: - github_issue_matched = False - for selected_github_issue in selected_github_issues: - if '/' in selected_github_issue: - # e.g. "NCATSTranslator/Babel#637" - github_issue_matched = ( - f"{github_issue_test.repo_id}#{github_issue_test.github_issue.number}" - == selected_github_issue - ) - elif '#' in selected_github_issue: - # e.g. "Babel#637" - repo_name = github_issue_test.repo_id.split('/')[-1] if '/' in github_issue_test.repo_id else github_issue_test.repo_id - github_issue_matched = ( - f"{repo_name}#{github_issue_test.github_issue.number}" - == selected_github_issue - ) - else: - # bare number, e.g. "637" - github_issue_matched = int(selected_github_issue) == github_issue_test.github_issue.number - if github_issue_matched: - break - - if not github_issue_matched: - pytest.skip( - f"GitHub Issue {github_issue_test.repo_id}#{github_issue_test.github_issue.number} " - f"not included in list of GitHub issues to be tested: {selected_github_issues}." - ) - return +def test_github_issue(target_info, github_issue_test: GitHubIssueTest): nodenorm = CachedNodeNorm.from_url(target_info['NodeNormURL']) nameres = CachedNameRes.from_url(target_info['NameResURL']) @@ -61,18 +17,18 @@ def test_github_issue(target_info, github_issue_test: GitHubIssueTest, selected_ github_issue_test.test_with_nameres(nodenorm, nameres), ) - issue_label = f"{github_issue_test.repo_id}#{github_issue_test.github_issue.number} ({github_issue_test.github_issue.state})" + issue_label = ( + f"{github_issue_test.repo_id}#{github_issue_test.github_issue.number}" + f" ({github_issue_test.github_issue.state})" + ) for result in results: match result: case TestResult(status=TestStatus.Passed, message=message): assert True, f"{issue_label}: {message}" - case TestResult(status=TestStatus.Failed, message=message): assert False, f"{issue_label}: {message}" - case TestResult(status=TestStatus.Skipped, message=message): pytest.skip(f"{issue_label}: {message}") - case _: assert False, f"Unknown result from {issue_label}: {result}"