From bc2d410a853f5844d92b64c68ee3d7a745422fe0 Mon Sep 17 00:00:00 2001 From: Andrea Margiovanni Date: Fri, 28 Nov 2025 10:50:41 +0100 Subject: [PATCH 1/3] Add GitHub analyzer tool with export functionality - Add main analyzer script for GitHub repository analysis - Include exported data and repository list --- github_analyzer.py | 1032 +++++++++++++++++++++++++++++++++++++++ github_export/.DS_Store | Bin 0 -> 6148 bytes github_export/.gitkeep | 1 + repos.txt | 0 4 files changed, 1033 insertions(+) create mode 100644 github_analyzer.py create mode 100644 github_export/.DS_Store create mode 100644 github_export/.gitkeep create mode 100644 repos.txt diff --git a/github_analyzer.py b/github_analyzer.py new file mode 100644 index 0000000..a9c8bb5 --- /dev/null +++ b/github_analyzer.py @@ -0,0 +1,1032 @@ +#!/usr/bin/env python3 +""" +GitHub Repository Analyzer +========================== +Analizza repository GitHub estraendo commit, merge, PR e altri dati utili +per analisi di produttivita e qualita del codice. + +Output: + - commits_export.csv: Tutti i commit di tutti i repository + - pull_requests_export.csv: Tutte le PR di tutti i repository + - contributors_summary.csv: Riepilogo per contributor + - repository_summary.csv: Riepilogo per repository + - quality_metrics.csv: Metriche di qualita per repository + - productivity_analysis.csv: Analisi produttivita per autore +""" + +import os +import csv +import json +import sys +from datetime import datetime, timedelta +from collections import defaultdict +from typing import Optional +import re + +# Prova a importare requests, altrimenti usa urllib +try: + import requests + HAS_REQUESTS = True +except ImportError: + import urllib.request + import urllib.error + HAS_REQUESTS = False + +# ============================================================================= +# CONFIGURAZIONE DEFAULT +# ============================================================================= + +DEFAULT_DAYS = 30 +DEFAULT_OUTPUT_DIR = "github_export" +DEFAULT_REPOS_FILE = "repos.txt" +PER_PAGE = 100 +VERBOSE = True + +# ============================================================================= +# COLORI TERMINALE +# ============================================================================= + +class Colors: + HEADER = '\033[95m' + BLUE = '\033[94m' + CYAN = '\033[96m' + GREEN = '\033[92m' + YELLOW = '\033[93m' + RED = '\033[91m' + BOLD = '\033[1m' + DIM = '\033[2m' + RESET = '\033[0m' + +def print_banner(): + """Stampa il banner di benvenuto.""" + banner = f""" +{Colors.CYAN}{Colors.BOLD} + ╔═══════════════════════════════════════════════════════════════╗ + ║ ║ + ║ {Colors.GREEN}█▀▀ █ ▀█▀ █ █ █ █ █▄▄ ▄▀█ █▄ █ ▄▀█ █ █▄█ ▀█ █▀▀ █▀█ {Colors.CYAN} ║ + ║ {Colors.GREEN}█▄█ █ █ █▀█ █▄█ █▄█ █▀█ █ ▀█ █▀█ █▄▄ █ █▄ ██▄ █▀▄ {Colors.CYAN} ║ + ║ ║ + ╠═══════════════════════════════════════════════════════════════╣ + ║ ║ + ║ {Colors.RESET}Analizza repository GitHub ed esporta dati in CSV {Colors.CYAN} ║ + ║ {Colors.RESET}per analisi di produttivita e qualita del codice. {Colors.CYAN} ║ + ║ ║ + ╚═══════════════════════════════════════════════════════════════╝ +{Colors.RESET}""" + print(banner) + +def print_features(): + """Stampa le funzionalita del tool.""" + print(f""" +{Colors.BOLD}📊 COSA FA QUESTO TOOL:{Colors.RESET} + + {Colors.GREEN}✓{Colors.RESET} Analizza {Colors.BOLD}commit{Colors.RESET} (autore, data, linee aggiunte/rimosse, file modificati) + {Colors.GREEN}✓{Colors.RESET} Analizza {Colors.BOLD}pull request{Colors.RESET} (stato, reviewer, tempo di merge, approvazioni) + {Colors.GREEN}✓{Colors.RESET} Analizza {Colors.BOLD}issues{Colors.RESET} (bug, enhancement, tempo di chiusura) + {Colors.GREEN}✓{Colors.RESET} Calcola {Colors.BOLD}metriche di qualita{Colors.RESET} (revert ratio, review coverage, commit quality) + {Colors.GREEN}✓{Colors.RESET} Genera {Colors.BOLD}analisi produttivita{Colors.RESET} per ogni contributor + {Colors.GREEN}✓{Colors.RESET} Esporta tutto in {Colors.BOLD}file CSV{Colors.RESET} pronti per l'analisi + +{Colors.BOLD}📁 FILE GENERATI:{Colors.RESET} + + • commits_export.csv - Tutti i commit con dettagli + • pull_requests_export.csv - Tutte le PR con metriche + • issues_export.csv - Tutte le issues + • repository_summary.csv - Statistiche per repository + • quality_metrics.csv - Metriche di qualita + • productivity_analysis.csv - Analisi produttivita per autore + • contributors_summary.csv - Riepilogo contributors +""") + +def print_separator(): + print(f"{Colors.DIM}{'─' * 65}{Colors.RESET}") + +def prompt_input(message: str, default: str = None) -> str: + """Richiede input all'utente con supporto per valore default.""" + if default: + prompt = f"{Colors.CYAN}▶{Colors.RESET} {message} [{Colors.DIM}{default}{Colors.RESET}]: " + else: + prompt = f"{Colors.CYAN}▶{Colors.RESET} {message}: " + + try: + value = input(prompt).strip() + return value if value else default + except (KeyboardInterrupt, EOFError): + print(f"\n{Colors.YELLOW}Operazione annullata.{Colors.RESET}") + sys.exit(0) + +def prompt_confirm(message: str, default: bool = True) -> bool: + """Richiede conferma si/no.""" + default_str = "S/n" if default else "s/N" + prompt = f"{Colors.CYAN}▶{Colors.RESET} {message} [{default_str}]: " + + try: + value = input(prompt).strip().lower() + if not value: + return default + return value in ('s', 'si', 'y', 'yes') + except (KeyboardInterrupt, EOFError): + print(f"\n{Colors.YELLOW}Operazione annullata.{Colors.RESET}") + sys.exit(0) + + +class GitHubAnalyzer: + """Analizzatore di repository GitHub.""" + + def __init__(self, token: str, output_dir: str, days: int, verbose: bool = True): + self.token = token + self.output_dir = output_dir + self.verbose = verbose + self.days = days + self.base_url = "https://api.github.com" + self.headers = { + "Authorization": f"token {token}", + "Accept": "application/vnd.github.v3+json", + "User-Agent": "GitHub-Analyzer-Script" + } + self.since_date = datetime.now() - timedelta(days=days) + self.request_count = 0 + self.start_time = None + + # Storage per dati aggregati + self.all_commits = [] + self.all_prs = [] + self.all_issues = [] + self.all_reviews = [] + self.contributor_stats = defaultdict(lambda: { + "commits": 0, + "additions": 0, + "deletions": 0, + "prs_opened": 0, + "prs_merged": 0, + "prs_reviewed": 0, + "issues_opened": 0, + "issues_closed": 0, + "comments": 0, + "repositories": set(), + "first_activity": None, + "last_activity": None, + "commit_days": set(), + "avg_commit_size": [], + }) + self.repo_stats = {} + + # Crea directory output + os.makedirs(output_dir, exist_ok=True) + + def _log(self, message: str, level: str = "info", force: bool = False): + """Log con supporto verbose.""" + if self.verbose or force or level == "error": + timestamp = datetime.now().strftime("%H:%M:%S") + + colors = { + "info": Colors.BLUE, + "debug": Colors.DIM, + "warn": Colors.YELLOW, + "error": Colors.RED, + "success": Colors.GREEN, + "api": Colors.CYAN + } + + prefixes = { + "info": "INFO", + "debug": "DEBUG", + "warn": "WARN", + "error": "ERROR", + "success": "OK", + "api": "API" + } + + color = colors.get(level, "") + prefix = prefixes.get(level, "INFO") + + print(f"{color}[{timestamp}] [{prefix}] {message}{Colors.RESET}") + sys.stdout.flush() + + def _make_request(self, url: str, params: dict = None) -> Optional[dict]: + """Effettua una richiesta HTTP all'API GitHub.""" + if params: + param_str = "&".join(f"{k}={v}" for k, v in params.items()) + full_url = f"{url}?{param_str}" + else: + full_url = url + + self.request_count += 1 + + # Log della richiesta + short_url = url.replace(self.base_url, "").split("?")[0] + self._log(f"Request #{self.request_count}: GET {short_url}", "api") + + try: + if HAS_REQUESTS: + response = requests.get(full_url, headers=self.headers, timeout=30) + + # Log rate limit info + remaining = response.headers.get("X-RateLimit-Remaining", "?") + limit = response.headers.get("X-RateLimit-Limit", "?") + self._log(f" -> Status: {response.status_code} | Rate limit: {remaining}/{limit}", "debug") + + if response.status_code == 200: + return response.json() + elif response.status_code == 403: + reset_time = response.headers.get("X-RateLimit-Reset", "") + if reset_time: + reset_dt = datetime.fromtimestamp(int(reset_time)) + self._log(f"Rate limit raggiunto! Reset alle {reset_dt.strftime('%H:%M:%S')}", "error", force=True) + else: + self._log(f"Accesso negato: {short_url}", "error", force=True) + return None + elif response.status_code == 404: + self._log(f"Risorsa non trovata: {short_url}", "warn") + return None + else: + self._log(f"Errore {response.status_code}: {short_url}", "error", force=True) + return None + else: + req = urllib.request.Request(full_url, headers=self.headers) + with urllib.request.urlopen(req, timeout=30) as response: + return json.loads(response.read().decode()) + except Exception as e: + self._log(f"Errore richiesta: {e}", "error", force=True) + return None + + def _paginate(self, url: str, params: dict = None) -> list: + """Gestisce la paginazione delle richieste GitHub.""" + all_items = [] + page = 1 + params = params or {} + params["per_page"] = PER_PAGE + + short_url = url.replace(self.base_url, "") + self._log(f"Inizio paginazione: {short_url}", "debug") + + while True: + params["page"] = page + items = self._make_request(url, params) + + if not items or len(items) == 0: + break + + all_items.extend(items) + self._log(f" Pagina {page}: +{len(items)} elementi (totale: {len(all_items)})", "debug") + + if len(items) < PER_PAGE: + break + + page += 1 + + # Safety limit + if page > 50: + self._log(f"Raggiunto limite pagine (50) per {short_url}", "warn") + break + + return all_items + + def parse_repo_url(self, repo: str) -> tuple: + """Estrae owner e repo name da URL o stringa.""" + repo = repo.replace("https://github.com/", "") + repo = repo.replace("https://github.com/", "") + repo = repo.rstrip("/") + repo = repo.replace(".git", "") + + parts = repo.split("/") + if len(parts) >= 2: + return parts[0], parts[1] + return None, None + + def fetch_commits(self, owner: str, repo: str) -> list: + """Recupera tutti i commit del repository.""" + self._log(f"Recupero commit per {owner}/{repo}...", "info") + url = f"{self.base_url}/repos/{owner}/{repo}/commits" + params = {"since": self.since_date.isoformat()} + + commits = self._paginate(url, params) + processed = [] + total = len(commits) + + for idx, commit in enumerate(commits, 1): + if not commit: + continue + + sha = commit.get("sha", "") + self._log(f" Commit {idx}/{total}: {sha[:7]} - Recupero dettagli...", "debug") + detail_url = f"{self.base_url}/repos/{owner}/{repo}/commits/{sha}" + detail = self._make_request(detail_url) + + stats = detail.get("stats", {}) if detail else {} + files = detail.get("files", []) if detail else [] + + commit_data = commit.get("commit", {}) + author_data = commit_data.get("author", {}) + committer_data = commit_data.get("committer", {}) + + author_login = "" + if commit.get("author"): + author_login = commit["author"].get("login", "") + + committer_login = "" + if commit.get("committer"): + committer_login = commit["committer"].get("login", "") + + message = commit_data.get("message", "") + is_merge = message.lower().startswith("merge") + is_revert = message.lower().startswith("revert") + + file_types = defaultdict(int) + for f in files: + filename = f.get("filename", "") + ext = os.path.splitext(filename)[1].lower() + file_types[ext] += 1 + + processed_commit = { + "repository": f"{owner}/{repo}", + "sha": sha, + "short_sha": sha[:7] if sha else "", + "author_name": author_data.get("name", ""), + "author_email": author_data.get("email", ""), + "author_login": author_login, + "committer_name": committer_data.get("name", ""), + "committer_email": committer_data.get("email", ""), + "committer_login": committer_login, + "date": author_data.get("date", ""), + "message": message.split("\n")[0][:200], + "full_message": message[:500], + "additions": stats.get("additions", 0), + "deletions": stats.get("deletions", 0), + "total_changes": stats.get("total", 0), + "files_changed": len(files), + "is_merge_commit": is_merge, + "is_revert": is_revert, + "file_types": json.dumps(dict(file_types)), + "url": commit.get("html_url", ""), + } + + processed.append(processed_commit) + + if author_login: + self._update_contributor_stats(author_login, processed_commit, "commit") + + self._log(f"Trovati {len(processed)} commit per {owner}/{repo}", "success") + return processed + + def fetch_pull_requests(self, owner: str, repo: str) -> list: + """Recupera tutte le pull request del repository.""" + self._log(f"Recupero pull requests per {owner}/{repo}...", "info") + url = f"{self.base_url}/repos/{owner}/{repo}/pulls" + params = {"state": "all", "sort": "updated", "direction": "desc"} + + prs = self._paginate(url, params) + processed = [] + processed_count = 0 + + for pr in prs: + if not pr: + continue + + created_at = pr.get("created_at", "") + if created_at: + created_date = datetime.fromisoformat(created_at.replace("Z", "+00:00")) + if created_date.replace(tzinfo=None) < self.since_date: + continue + + processed_count += 1 + pr_number = pr.get("number") + self._log(f" PR {processed_count} (#{pr_number}): Recupero reviews e commenti...", "debug") + + reviews_url = f"{self.base_url}/repos/{owner}/{repo}/pulls/{pr_number}/reviews" + reviews = self._make_request(reviews_url) or [] + + comments_url = f"{self.base_url}/repos/{owner}/{repo}/pulls/{pr_number}/comments" + comments = self._make_request(comments_url) or [] + + merged_at = pr.get("merged_at") + time_to_merge = None + if merged_at and created_at: + created = datetime.fromisoformat(created_at.replace("Z", "+00:00")) + merged = datetime.fromisoformat(merged_at.replace("Z", "+00:00")) + time_to_merge = (merged - created).total_seconds() / 3600 + + labels = [l.get("name", "") for l in pr.get("labels", [])] + + user = pr.get("user", {}) + merged_by = pr.get("merged_by", {}) + + processed_pr = { + "repository": f"{owner}/{repo}", + "number": pr_number, + "title": pr.get("title", "")[:200], + "state": pr.get("state", ""), + "author_login": user.get("login", ""), + "author_type": user.get("type", ""), + "created_at": created_at, + "updated_at": pr.get("updated_at", ""), + "closed_at": pr.get("closed_at", ""), + "merged_at": merged_at, + "merged_by": merged_by.get("login", "") if merged_by else "", + "is_merged": pr.get("merged", False), + "draft": pr.get("draft", False), + "additions": pr.get("additions", 0), + "deletions": pr.get("deletions", 0), + "changed_files": pr.get("changed_files", 0), + "commits": pr.get("commits", 0), + "comments": pr.get("comments", 0), + "review_comments": pr.get("review_comments", 0), + "time_to_merge_hours": round(time_to_merge, 2) if time_to_merge else None, + "labels": ",".join(labels), + "reviewers_count": len(set(r.get("user", {}).get("login", "") for r in reviews if r.get("user"))), + "approvals": len([r for r in reviews if r.get("state") == "APPROVED"]), + "changes_requested": len([r for r in reviews if r.get("state") == "CHANGES_REQUESTED"]), + "base_branch": pr.get("base", {}).get("ref", ""), + "head_branch": pr.get("head", {}).get("ref", ""), + "url": pr.get("html_url", ""), + } + + processed.append(processed_pr) + + author = user.get("login", "") + if author: + self._update_contributor_stats(author, processed_pr, "pr") + + for review in reviews: + reviewer = review.get("user", {}).get("login", "") + if reviewer and reviewer != author: + self._update_contributor_stats(reviewer, review, "review") + + self._log(f"Trovate {len(processed)} pull requests per {owner}/{repo}", "success") + return processed + + def fetch_issues(self, owner: str, repo: str) -> list: + """Recupera tutte le issue del repository (escluse le PR).""" + self._log(f"Recupero issues per {owner}/{repo}...", "info") + url = f"{self.base_url}/repos/{owner}/{repo}/issues" + params = {"state": "all", "since": self.since_date.isoformat()} + + issues = self._paginate(url, params) + processed = [] + + for issue in issues: + if not issue: + continue + + if issue.get("pull_request"): + continue + + user = issue.get("user", {}) + assignees = [a.get("login", "") for a in issue.get("assignees", [])] + labels = [l.get("name", "") for l in issue.get("labels", [])] + + created_at = issue.get("created_at", "") + closed_at = issue.get("closed_at") + time_to_close = None + if closed_at and created_at: + created = datetime.fromisoformat(created_at.replace("Z", "+00:00")) + closed = datetime.fromisoformat(closed_at.replace("Z", "+00:00")) + time_to_close = (closed - created).total_seconds() / 3600 + + processed_issue = { + "repository": f"{owner}/{repo}", + "number": issue.get("number"), + "title": issue.get("title", "")[:200], + "state": issue.get("state", ""), + "author_login": user.get("login", ""), + "created_at": created_at, + "updated_at": issue.get("updated_at", ""), + "closed_at": closed_at, + "closed_by": issue.get("closed_by", {}).get("login", "") if issue.get("closed_by") else "", + "comments": issue.get("comments", 0), + "labels": ",".join(labels), + "assignees": ",".join(assignees), + "time_to_close_hours": round(time_to_close, 2) if time_to_close else None, + "is_bug": any("bug" in l.lower() for l in labels), + "is_enhancement": any("enhancement" in l.lower() or "feature" in l.lower() for l in labels), + "url": issue.get("html_url", ""), + } + + processed.append(processed_issue) + + author = user.get("login", "") + if author: + self._update_contributor_stats(author, processed_issue, "issue") + + self._log(f"Trovate {len(processed)} issues per {owner}/{repo}", "success") + return processed + + def _update_contributor_stats(self, login: str, data: dict, data_type: str): + """Aggiorna le statistiche aggregate per contributor.""" + stats = self.contributor_stats[login] + stats["repositories"].add(data.get("repository", "")) + + date_str = data.get("date") or data.get("created_at") or data.get("submitted_at") + if date_str: + try: + date = datetime.fromisoformat(date_str.replace("Z", "+00:00")).replace(tzinfo=None) + if stats["first_activity"] is None or date < stats["first_activity"]: + stats["first_activity"] = date + if stats["last_activity"] is None or date > stats["last_activity"]: + stats["last_activity"] = date + except: + pass + + if data_type == "commit": + stats["commits"] += 1 + stats["additions"] += data.get("additions", 0) + stats["deletions"] += data.get("deletions", 0) + stats["avg_commit_size"].append(data.get("total_changes", 0)) + if date_str: + try: + date = datetime.fromisoformat(date_str.replace("Z", "+00:00")) + stats["commit_days"].add(date.strftime("%Y-%m-%d")) + except: + pass + + elif data_type == "pr": + stats["prs_opened"] += 1 + if data.get("is_merged"): + stats["prs_merged"] += 1 + + elif data_type == "review": + stats["prs_reviewed"] += 1 + + elif data_type == "issue": + stats["issues_opened"] += 1 + if data.get("state") == "closed": + stats["issues_closed"] += 1 + + def calculate_repo_stats(self, owner: str, repo: str, commits: list, prs: list, issues: list) -> dict: + """Calcola statistiche aggregate per repository.""" + repo_name = f"{owner}/{repo}" + + total_commits = len(commits) + merge_commits = len([c for c in commits if c.get("is_merge_commit")]) + revert_commits = len([c for c in commits if c.get("is_revert")]) + total_additions = sum(c.get("additions", 0) for c in commits) + total_deletions = sum(c.get("deletions", 0) for c in commits) + + commit_authors = set(c.get("author_login") for c in commits if c.get("author_login")) + + total_prs = len(prs) + merged_prs = len([p for p in prs if p.get("is_merged")]) + open_prs = len([p for p in prs if p.get("state") == "open"]) + + merge_times = [p.get("time_to_merge_hours") for p in prs if p.get("time_to_merge_hours")] + avg_time_to_merge = sum(merge_times) / len(merge_times) if merge_times else None + + total_issues = len(issues) + closed_issues = len([i for i in issues if i.get("state") == "closed"]) + bug_issues = len([i for i in issues if i.get("is_bug")]) + + commit_dates = set() + for c in commits: + if c.get("date"): + try: + date = datetime.fromisoformat(c["date"].replace("Z", "+00:00")) + commit_dates.add(date.strftime("%Y-%m-%d")) + except: + pass + + active_days = len(commit_dates) + commits_per_day = total_commits / active_days if active_days > 0 else 0 + + return { + "repository": repo_name, + "total_commits": total_commits, + "merge_commits": merge_commits, + "revert_commits": revert_commits, + "regular_commits": total_commits - merge_commits - revert_commits, + "total_additions": total_additions, + "total_deletions": total_deletions, + "net_lines": total_additions - total_deletions, + "unique_authors": len(commit_authors), + "total_prs": total_prs, + "merged_prs": merged_prs, + "open_prs": open_prs, + "pr_merge_rate": round(merged_prs / total_prs * 100, 2) if total_prs > 0 else 0, + "avg_time_to_merge_hours": round(avg_time_to_merge, 2) if avg_time_to_merge else None, + "total_issues": total_issues, + "closed_issues": closed_issues, + "open_issues": total_issues - closed_issues, + "bug_issues": bug_issues, + "issue_close_rate": round(closed_issues / total_issues * 100, 2) if total_issues > 0 else 0, + "active_days": active_days, + "commits_per_active_day": round(commits_per_day, 2), + "analysis_period_days": self.days, + } + + def calculate_quality_metrics(self, owner: str, repo: str, commits: list, prs: list) -> dict: + """Calcola metriche di qualita del codice.""" + repo_name = f"{owner}/{repo}" + + total_commits = len(commits) + reverts = len([c for c in commits if c.get("is_revert")]) + revert_ratio = reverts / total_commits * 100 if total_commits > 0 else 0 + + commit_sizes = [c.get("total_changes", 0) for c in commits] + avg_commit_size = sum(commit_sizes) / len(commit_sizes) if commit_sizes else 0 + large_commits = len([s for s in commit_sizes if s > 500]) + + total_prs = len(prs) + reviewed_prs = len([p for p in prs if p.get("reviewers_count", 0) > 0]) + review_coverage = reviewed_prs / total_prs * 100 if total_prs > 0 else 0 + + approved_prs = len([p for p in prs if p.get("approvals", 0) > 0]) + approval_rate = approved_prs / total_prs * 100 if total_prs > 0 else 0 + + changes_requested = len([p for p in prs if p.get("changes_requested", 0) > 0]) + changes_requested_ratio = changes_requested / total_prs * 100 if total_prs > 0 else 0 + + draft_prs = len([p for p in prs if p.get("draft")]) + draft_ratio = draft_prs / total_prs * 100 if total_prs > 0 else 0 + + good_messages = 0 + for c in commits: + msg = c.get("message", "") + if len(msg) > 10 and (msg[0].isupper() or re.match(r'^(feat|fix|docs|style|refactor|test|chore)', msg.lower())): + good_messages += 1 + message_quality = good_messages / total_commits * 100 if total_commits > 0 else 0 + + return { + "repository": repo_name, + "revert_ratio_pct": round(revert_ratio, 2), + "avg_commit_size_lines": round(avg_commit_size, 2), + "large_commits_count": large_commits, + "large_commits_ratio_pct": round(large_commits / total_commits * 100, 2) if total_commits > 0 else 0, + "pr_review_coverage_pct": round(review_coverage, 2), + "pr_approval_rate_pct": round(approval_rate, 2), + "pr_changes_requested_ratio_pct": round(changes_requested_ratio, 2), + "draft_pr_ratio_pct": round(draft_ratio, 2), + "commit_message_quality_pct": round(message_quality, 2), + "quality_score": round( + (100 - revert_ratio) * 0.2 + + review_coverage * 0.25 + + approval_rate * 0.2 + + (100 - changes_requested_ratio) * 0.15 + + message_quality * 0.2, + 2 + ), + } + + def analyze_repository(self, repo: str, repo_index: int = 0, total_repos: int = 0): + """Analizza un singolo repository.""" + owner, repo_name = self.parse_repo_url(repo) + + if not owner or not repo_name: + self._log(f"Formato repository non valido: {repo}", "error", force=True) + return + + repo_progress = f"[{repo_index}/{total_repos}] " if total_repos > 0 else "" + print(f"\n{'=' * 65}") + self._log(f"{repo_progress}ANALISI REPOSITORY: {owner}/{repo_name}", "info", force=True) + print(f"{'=' * 65}") + + repo_start = datetime.now() + + commits = self.fetch_commits(owner, repo_name) + prs = self.fetch_pull_requests(owner, repo_name) + issues = self.fetch_issues(owner, repo_name) + + self.all_commits.extend(commits) + self.all_prs.extend(prs) + self.all_issues.extend(issues) + + self._log("Calcolo statistiche repository...", "info") + repo_stats = self.calculate_repo_stats(owner, repo_name, commits, prs, issues) + quality_metrics = self.calculate_quality_metrics(owner, repo_name, commits, prs) + + self.repo_stats[f"{owner}/{repo_name}"] = { + "summary": repo_stats, + "quality": quality_metrics, + } + + elapsed = (datetime.now() - repo_start).total_seconds() + self._log( + f"Completato {owner}/{repo_name} in {elapsed:.1f}s: " + f"{repo_stats['total_commits']} commit, {repo_stats['total_prs']} PR, " + f"{repo_stats['total_issues']} issues", + "success", force=True + ) + + def generate_productivity_analysis(self) -> list: + """Genera analisi di produttivita per ogni contributor.""" + productivity = [] + + for login, stats in self.contributor_stats.items(): + if not login: + continue + + total_commits = stats["commits"] + active_days = len(stats["commit_days"]) + commits_per_day = total_commits / active_days if active_days > 0 else 0 + + avg_commit_size = sum(stats["avg_commit_size"]) / len(stats["avg_commit_size"]) if stats["avg_commit_size"] else 0 + + pr_merge_rate = stats["prs_merged"] / stats["prs_opened"] * 100 if stats["prs_opened"] > 0 else 0 + + activity_span_days = 0 + if stats["first_activity"] and stats["last_activity"]: + activity_span_days = (stats["last_activity"] - stats["first_activity"]).days + 1 + + consistency = active_days / activity_span_days * 100 if activity_span_days > 0 else 0 + + productivity.append({ + "contributor": login, + "repositories": ",".join(stats["repositories"]), + "repositories_count": len(stats["repositories"]), + "total_commits": total_commits, + "total_additions": stats["additions"], + "total_deletions": stats["deletions"], + "net_lines": stats["additions"] - stats["deletions"], + "avg_commit_size": round(avg_commit_size, 2), + "prs_opened": stats["prs_opened"], + "prs_merged": stats["prs_merged"], + "pr_merge_rate_pct": round(pr_merge_rate, 2), + "prs_reviewed": stats["prs_reviewed"], + "issues_opened": stats["issues_opened"], + "issues_closed": stats["issues_closed"], + "active_days": active_days, + "commits_per_active_day": round(commits_per_day, 2), + "first_activity": stats["first_activity"].isoformat() if stats["first_activity"] else "", + "last_activity": stats["last_activity"].isoformat() if stats["last_activity"] else "", + "activity_span_days": activity_span_days, + "consistency_pct": round(consistency, 2), + "productivity_score": round( + min(total_commits / 10, 30) + + min(stats["prs_merged"] * 5, 25) + + min(stats["prs_reviewed"] * 3, 20) + + min(consistency / 5, 15) + + min(len(stats["repositories"]) * 2, 10), + 2 + ), + }) + + return sorted(productivity, key=lambda x: -x["productivity_score"]) + + def export_to_csv(self): + """Esporta tutti i dati in file CSV.""" + print(f"\n{Colors.BOLD}📁 Esportazione CSV...{Colors.RESET}") + + if self.all_commits: + filepath = os.path.join(self.output_dir, "commits_export.csv") + with open(filepath, "w", newline="", encoding="utf-8") as f: + writer = csv.DictWriter(f, fieldnames=self.all_commits[0].keys()) + writer.writeheader() + writer.writerows(self.all_commits) + print(f" {Colors.GREEN}✓{Colors.RESET} commits_export.csv ({len(self.all_commits)} righe)") + + if self.all_prs: + filepath = os.path.join(self.output_dir, "pull_requests_export.csv") + with open(filepath, "w", newline="", encoding="utf-8") as f: + writer = csv.DictWriter(f, fieldnames=self.all_prs[0].keys()) + writer.writeheader() + writer.writerows(self.all_prs) + print(f" {Colors.GREEN}✓{Colors.RESET} pull_requests_export.csv ({len(self.all_prs)} righe)") + + if self.all_issues: + filepath = os.path.join(self.output_dir, "issues_export.csv") + with open(filepath, "w", newline="", encoding="utf-8") as f: + writer = csv.DictWriter(f, fieldnames=self.all_issues[0].keys()) + writer.writeheader() + writer.writerows(self.all_issues) + print(f" {Colors.GREEN}✓{Colors.RESET} issues_export.csv ({len(self.all_issues)} righe)") + + if self.repo_stats: + summaries = [s["summary"] for s in self.repo_stats.values()] + filepath = os.path.join(self.output_dir, "repository_summary.csv") + with open(filepath, "w", newline="", encoding="utf-8") as f: + writer = csv.DictWriter(f, fieldnames=summaries[0].keys()) + writer.writeheader() + writer.writerows(summaries) + print(f" {Colors.GREEN}✓{Colors.RESET} repository_summary.csv ({len(summaries)} righe)") + + if self.repo_stats: + quality = [s["quality"] for s in self.repo_stats.values()] + filepath = os.path.join(self.output_dir, "quality_metrics.csv") + with open(filepath, "w", newline="", encoding="utf-8") as f: + writer = csv.DictWriter(f, fieldnames=quality[0].keys()) + writer.writeheader() + writer.writerows(quality) + print(f" {Colors.GREEN}✓{Colors.RESET} quality_metrics.csv ({len(quality)} righe)") + + productivity = self.generate_productivity_analysis() + if productivity: + filepath = os.path.join(self.output_dir, "productivity_analysis.csv") + with open(filepath, "w", newline="", encoding="utf-8") as f: + writer = csv.DictWriter(f, fieldnames=productivity[0].keys()) + writer.writeheader() + writer.writerows(productivity) + print(f" {Colors.GREEN}✓{Colors.RESET} productivity_analysis.csv ({len(productivity)} righe)") + + if self.contributor_stats: + contributors = [] + for login, stats in self.contributor_stats.items(): + if not login: + continue + contributors.append({ + "login": login, + "commits": stats["commits"], + "additions": stats["additions"], + "deletions": stats["deletions"], + "prs_opened": stats["prs_opened"], + "prs_merged": stats["prs_merged"], + "prs_reviewed": stats["prs_reviewed"], + "issues_opened": stats["issues_opened"], + "repositories_count": len(stats["repositories"]), + "repositories": ",".join(stats["repositories"]), + }) + + if contributors: + filepath = os.path.join(self.output_dir, "contributors_summary.csv") + with open(filepath, "w", newline="", encoding="utf-8") as f: + writer = csv.DictWriter(f, fieldnames=contributors[0].keys()) + writer.writeheader() + writer.writerows(sorted(contributors, key=lambda x: -x["commits"])) + print(f" {Colors.GREEN}✓{Colors.RESET} contributors_summary.csv ({len(contributors)} righe)") + + def run(self, repositories: list): + """Esegue l'analisi completa su tutti i repository.""" + if not repositories: + self._log("Nessun repository specificato!", "error", force=True) + return + + self.start_time = datetime.now() + total_repos = len(repositories) + + print(f"\n{'=' * 65}") + print(f"{Colors.BOLD} 🚀 AVVIO ANALISI{Colors.RESET}") + print(f"{'=' * 65}") + print(f" Repository da analizzare: {Colors.BOLD}{total_repos}{Colors.RESET}") + print(f" Periodo analisi: ultimi {Colors.BOLD}{self.days}{Colors.RESET} giorni") + print(f" Data inizio periodo: {Colors.BOLD}{self.since_date.strftime('%Y-%m-%d')}{Colors.RESET}") + print(f" Output directory: {Colors.BOLD}{os.path.abspath(self.output_dir)}{Colors.RESET}") + + for idx, repo in enumerate(repositories, 1): + try: + self.analyze_repository(repo, idx, total_repos) + except Exception as e: + self._log(f"Errore analisi {repo}: {e}", "error", force=True) + if self.verbose: + import traceback + traceback.print_exc() + + self.export_to_csv() + + total_time = (datetime.now() - self.start_time).total_seconds() + + print(f"\n{'=' * 65}") + print(f"{Colors.GREEN}{Colors.BOLD} ✅ ANALISI COMPLETATA!{Colors.RESET}") + print(f"{'=' * 65}") + print(f" ⏱️ Tempo totale: {Colors.BOLD}{total_time:.1f}{Colors.RESET} secondi") + print(f" 🌐 Richieste API: {Colors.BOLD}{self.request_count}{Colors.RESET}") + print(f" 📝 Commit analizzati: {Colors.BOLD}{len(self.all_commits)}{Colors.RESET}") + print(f" 🔀 Pull request analizzate: {Colors.BOLD}{len(self.all_prs)}{Colors.RESET}") + print(f" 🎫 Issues analizzate: {Colors.BOLD}{len(self.all_issues)}{Colors.RESET}") + print(f" 👥 Contributors trovati: {Colors.BOLD}{len(self.contributor_stats)}{Colors.RESET}") + print(f"\n 📁 File generati in: {Colors.CYAN}{os.path.abspath(self.output_dir)}/{Colors.RESET}") + + +def load_repos_from_file(filepath: str) -> list: + """Carica la lista di repository da file.""" + repos = [] + if os.path.exists(filepath): + with open(filepath, 'r') as f: + for line in f: + line = line.strip() + if line and not line.startswith('#'): + repos.append(line) + return repos + + +def create_sample_repos_file(filepath: str): + """Crea un file repos.txt di esempio.""" + sample_content = """# GitHub Repository Analyzer - Lista Repository +# Inserisci un repository per riga +# Formati supportati: +# owner/repo +# https://github.com/owner/repo +# +# Esempio: +# facebook/react +# microsoft/vscode +# https://github.com/torvalds/linux + +""" + with open(filepath, 'w') as f: + f.write(sample_content) + + +def validate_token(token: str) -> bool: + """Verifica se il token GitHub e valido.""" + if not token or len(token) < 10: + return False + + try: + headers = { + "Authorization": f"token {token}", + "Accept": "application/vnd.github.v3+json", + "User-Agent": "GitHub-Analyzer-Script" + } + + if HAS_REQUESTS: + response = requests.get("https://api.github.com/user", headers=headers, timeout=10) + return response.status_code == 200 + else: + req = urllib.request.Request("https://api.github.com/user", headers=headers) + with urllib.request.urlopen(req, timeout=10) as response: + return response.status == 200 + except: + return False + + +def main(): + """Funzione principale interattiva.""" + + # Banner e presentazione + print_banner() + print_features() + + print_separator() + print(f"{Colors.BOLD}⚙️ CONFIGURAZIONE{Colors.RESET}\n") + + # 1. Richiedi GitHub Token + print(f" Per usare questo tool hai bisogno di un {Colors.BOLD}GitHub Personal Access Token{Colors.RESET}.") + print(f" Crealo su: {Colors.CYAN}https://github.com/settings/tokens{Colors.RESET}") + print(f" Permessi necessari: {Colors.DIM}repo (full control){Colors.RESET}\n") + + token = prompt_input("Inserisci il tuo GitHub Token") + + if not token: + print(f"\n{Colors.RED}❌ Token non fornito. Impossibile continuare.{Colors.RESET}") + sys.exit(1) + + # Valida token + print(f"\n{Colors.DIM} Verifica token in corso...{Colors.RESET}", end=" ") + sys.stdout.flush() + + if validate_token(token): + print(f"{Colors.GREEN}✓ Token valido!{Colors.RESET}") + else: + print(f"{Colors.RED}✗ Token non valido o senza permessi sufficienti.{Colors.RESET}") + if not prompt_confirm("Vuoi continuare comunque?", default=False): + sys.exit(1) + + # 2. Verifica/crea file repos.txt + print() + repos_file = DEFAULT_REPOS_FILE + + if not os.path.exists(repos_file): + print(f" {Colors.YELLOW}⚠{Colors.RESET} File {Colors.BOLD}{repos_file}{Colors.RESET} non trovato.") + create_sample_repos_file(repos_file) + print(f" {Colors.GREEN}✓{Colors.RESET} Creato file di esempio: {Colors.BOLD}{repos_file}{Colors.RESET}") + + repos = load_repos_from_file(repos_file) + + if not repos: + print(f"\n {Colors.YELLOW}⚠{Colors.RESET} Nessun repository trovato in {Colors.BOLD}{repos_file}{Colors.RESET}") + print(f" Aggiungi i repository da analizzare (uno per riga) e rilancia lo script.") + print(f"\n Esempio contenuto {repos_file}:") + print(f" {Colors.DIM}owner/repo1") + print(f" owner/repo2") + print(f" https://github.com/org/project{Colors.RESET}") + sys.exit(0) + + print(f"\n {Colors.GREEN}✓{Colors.RESET} Trovati {Colors.BOLD}{len(repos)}{Colors.RESET} repository in {repos_file}:") + for r in repos[:5]: + print(f" {Colors.DIM}• {r}{Colors.RESET}") + if len(repos) > 5: + print(f" {Colors.DIM}... e altri {len(repos) - 5}{Colors.RESET}") + + # 3. Chiedi periodo di analisi + print() + days_str = prompt_input(f"Quanti giorni vuoi analizzare?", str(DEFAULT_DAYS)) + + try: + days = int(days_str) + if days < 1: + days = DEFAULT_DAYS + except ValueError: + days = DEFAULT_DAYS + + # 4. Directory output + output_dir = DEFAULT_OUTPUT_DIR + + # 5. Conferma e avvio + print() + print_separator() + print(f"\n{Colors.BOLD}📋 RIEPILOGO CONFIGURAZIONE:{Colors.RESET}") + print(f" • Repository: {Colors.BOLD}{len(repos)}{Colors.RESET}") + print(f" • Periodo: ultimi {Colors.BOLD}{days}{Colors.RESET} giorni") + print(f" • Output: {Colors.BOLD}{output_dir}/{Colors.RESET}") + print() + + if not prompt_confirm("Avviare l'analisi?", default=True): + print(f"\n{Colors.YELLOW}Analisi annullata.{Colors.RESET}") + sys.exit(0) + + # Avvia analisi + analyzer = GitHubAnalyzer(token, output_dir, days, verbose=VERBOSE) + analyzer.run(repos) + + print(f"\n{Colors.GREEN}Grazie per aver usato GitHub Analyzer!{Colors.RESET}\n") + + +if __name__ == "__main__": + main() diff --git a/github_export/.DS_Store b/github_export/.DS_Store new file mode 100644 index 0000000000000000000000000000000000000000..5008ddfcf53c02e82d7eee2e57c38e5672ef89f6 GIT binary patch literal 6148 zcmeH~Jr2S!425mzP>H1@V-^m;4Wg<&0T*E43hX&L&p$$qDprKhvt+--jT7}7np#A3 zem<@ulZcFPQ@L2!n>{z**++&mCkOWA81W14cNZlEfg7;MkzE(HCqgga^y>{tEnwC%0;vJ&^%eQ zLs35+`xjp>T0 Date: Fri, 28 Nov 2025 10:51:04 +0100 Subject: [PATCH 2/3] gitignore --- .gitignore | 1 + 1 file changed, 1 insertion(+) create mode 100644 .gitignore diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..e43b0f9 --- /dev/null +++ b/.gitignore @@ -0,0 +1 @@ +.DS_Store From 1b80de49a258e3b03c30609c789904e100dc4fa8 Mon Sep 17 00:00:00 2001 From: Andrea Margiovanni Date: Fri, 28 Nov 2025 10:57:05 +0100 Subject: [PATCH 3/3] constitution --- .claude/commands/speckit.analyze.md | 184 ++++ .claude/commands/speckit.checklist.md | 294 +++++++ .claude/commands/speckit.clarify.md | 181 ++++ .claude/commands/speckit.constitution.md | 82 ++ .claude/commands/speckit.implement.md | 135 +++ .claude/commands/speckit.plan.md | 89 ++ .claude/commands/speckit.specify.md | 257 ++++++ .claude/commands/speckit.tasks.md | 137 +++ .claude/commands/speckit.taskstoissues.md | 28 + .specify/memory/constitution.md | 165 ++++ .specify/scripts/bash/check-prerequisites.sh | 166 ++++ .specify/scripts/bash/common.sh | 156 ++++ .specify/scripts/bash/create-new-feature.sh | 305 +++++++ .specify/scripts/bash/setup-plan.sh | 61 ++ .specify/scripts/bash/update-agent-context.sh | 790 ++++++++++++++++++ .specify/templates/agent-file-template.md | 28 + .specify/templates/checklist-template.md | 40 + .specify/templates/plan-template.md | 104 +++ .specify/templates/spec-template.md | 115 +++ .specify/templates/tasks-template.md | 251 ++++++ 20 files changed, 3568 insertions(+) create mode 100644 .claude/commands/speckit.analyze.md create mode 100644 .claude/commands/speckit.checklist.md create mode 100644 .claude/commands/speckit.clarify.md create mode 100644 .claude/commands/speckit.constitution.md create mode 100644 .claude/commands/speckit.implement.md create mode 100644 .claude/commands/speckit.plan.md create mode 100644 .claude/commands/speckit.specify.md create mode 100644 .claude/commands/speckit.tasks.md create mode 100644 .claude/commands/speckit.taskstoissues.md create mode 100644 .specify/memory/constitution.md create mode 100755 .specify/scripts/bash/check-prerequisites.sh create mode 100755 .specify/scripts/bash/common.sh create mode 100755 .specify/scripts/bash/create-new-feature.sh create mode 100755 .specify/scripts/bash/setup-plan.sh create mode 100755 .specify/scripts/bash/update-agent-context.sh create mode 100644 .specify/templates/agent-file-template.md create mode 100644 .specify/templates/checklist-template.md create mode 100644 .specify/templates/plan-template.md create mode 100644 .specify/templates/spec-template.md create mode 100644 .specify/templates/tasks-template.md diff --git a/.claude/commands/speckit.analyze.md b/.claude/commands/speckit.analyze.md new file mode 100644 index 0000000..98b04b0 --- /dev/null +++ b/.claude/commands/speckit.analyze.md @@ -0,0 +1,184 @@ +--- +description: Perform a non-destructive cross-artifact consistency and quality analysis across spec.md, plan.md, and tasks.md after task generation. +--- + +## User Input + +```text +$ARGUMENTS +``` + +You **MUST** consider the user input before proceeding (if not empty). + +## Goal + +Identify inconsistencies, duplications, ambiguities, and underspecified items across the three core artifacts (`spec.md`, `plan.md`, `tasks.md`) before implementation. This command MUST run only after `/speckit.tasks` has successfully produced a complete `tasks.md`. + +## Operating Constraints + +**STRICTLY READ-ONLY**: Do **not** modify any files. Output a structured analysis report. Offer an optional remediation plan (user must explicitly approve before any follow-up editing commands would be invoked manually). + +**Constitution Authority**: The project constitution (`.specify/memory/constitution.md`) is **non-negotiable** within this analysis scope. Constitution conflicts are automatically CRITICAL and require adjustment of the spec, plan, or tasks—not dilution, reinterpretation, or silent ignoring of the principle. If a principle itself needs to change, that must occur in a separate, explicit constitution update outside `/speckit.analyze`. + +## Execution Steps + +### 1. Initialize Analysis Context + +Run `.specify/scripts/bash/check-prerequisites.sh --json --require-tasks --include-tasks` once from repo root and parse JSON for FEATURE_DIR and AVAILABLE_DOCS. Derive absolute paths: + +- SPEC = FEATURE_DIR/spec.md +- PLAN = FEATURE_DIR/plan.md +- TASKS = FEATURE_DIR/tasks.md + +Abort with an error message if any required file is missing (instruct the user to run missing prerequisite command). +For single quotes in args like "I'm Groot", use escape syntax: e.g 'I'\''m Groot' (or double-quote if possible: "I'm Groot"). + +### 2. Load Artifacts (Progressive Disclosure) + +Load only the minimal necessary context from each artifact: + +**From spec.md:** + +- Overview/Context +- Functional Requirements +- Non-Functional Requirements +- User Stories +- Edge Cases (if present) + +**From plan.md:** + +- Architecture/stack choices +- Data Model references +- Phases +- Technical constraints + +**From tasks.md:** + +- Task IDs +- Descriptions +- Phase grouping +- Parallel markers [P] +- Referenced file paths + +**From constitution:** + +- Load `.specify/memory/constitution.md` for principle validation + +### 3. Build Semantic Models + +Create internal representations (do not include raw artifacts in output): + +- **Requirements inventory**: Each functional + non-functional requirement with a stable key (derive slug based on imperative phrase; e.g., "User can upload file" → `user-can-upload-file`) +- **User story/action inventory**: Discrete user actions with acceptance criteria +- **Task coverage mapping**: Map each task to one or more requirements or stories (inference by keyword / explicit reference patterns like IDs or key phrases) +- **Constitution rule set**: Extract principle names and MUST/SHOULD normative statements + +### 4. Detection Passes (Token-Efficient Analysis) + +Focus on high-signal findings. Limit to 50 findings total; aggregate remainder in overflow summary. + +#### A. Duplication Detection + +- Identify near-duplicate requirements +- Mark lower-quality phrasing for consolidation + +#### B. Ambiguity Detection + +- Flag vague adjectives (fast, scalable, secure, intuitive, robust) lacking measurable criteria +- Flag unresolved placeholders (TODO, TKTK, ???, ``, etc.) + +#### C. Underspecification + +- Requirements with verbs but missing object or measurable outcome +- User stories missing acceptance criteria alignment +- Tasks referencing files or components not defined in spec/plan + +#### D. Constitution Alignment + +- Any requirement or plan element conflicting with a MUST principle +- Missing mandated sections or quality gates from constitution + +#### E. Coverage Gaps + +- Requirements with zero associated tasks +- Tasks with no mapped requirement/story +- Non-functional requirements not reflected in tasks (e.g., performance, security) + +#### F. Inconsistency + +- Terminology drift (same concept named differently across files) +- Data entities referenced in plan but absent in spec (or vice versa) +- Task ordering contradictions (e.g., integration tasks before foundational setup tasks without dependency note) +- Conflicting requirements (e.g., one requires Next.js while other specifies Vue) + +### 5. Severity Assignment + +Use this heuristic to prioritize findings: + +- **CRITICAL**: Violates constitution MUST, missing core spec artifact, or requirement with zero coverage that blocks baseline functionality +- **HIGH**: Duplicate or conflicting requirement, ambiguous security/performance attribute, untestable acceptance criterion +- **MEDIUM**: Terminology drift, missing non-functional task coverage, underspecified edge case +- **LOW**: Style/wording improvements, minor redundancy not affecting execution order + +### 6. Produce Compact Analysis Report + +Output a Markdown report (no file writes) with the following structure: + +## Specification Analysis Report + +| ID | Category | Severity | Location(s) | Summary | Recommendation | +|----|----------|----------|-------------|---------|----------------| +| A1 | Duplication | HIGH | spec.md:L120-134 | Two similar requirements ... | Merge phrasing; keep clearer version | + +(Add one row per finding; generate stable IDs prefixed by category initial.) + +**Coverage Summary Table:** + +| Requirement Key | Has Task? | Task IDs | Notes | +|-----------------|-----------|----------|-------| + +**Constitution Alignment Issues:** (if any) + +**Unmapped Tasks:** (if any) + +**Metrics:** + +- Total Requirements +- Total Tasks +- Coverage % (requirements with >=1 task) +- Ambiguity Count +- Duplication Count +- Critical Issues Count + +### 7. Provide Next Actions + +At end of report, output a concise Next Actions block: + +- If CRITICAL issues exist: Recommend resolving before `/speckit.implement` +- If only LOW/MEDIUM: User may proceed, but provide improvement suggestions +- Provide explicit command suggestions: e.g., "Run /speckit.specify with refinement", "Run /speckit.plan to adjust architecture", "Manually edit tasks.md to add coverage for 'performance-metrics'" + +### 8. Offer Remediation + +Ask the user: "Would you like me to suggest concrete remediation edits for the top N issues?" (Do NOT apply them automatically.) + +## Operating Principles + +### Context Efficiency + +- **Minimal high-signal tokens**: Focus on actionable findings, not exhaustive documentation +- **Progressive disclosure**: Load artifacts incrementally; don't dump all content into analysis +- **Token-efficient output**: Limit findings table to 50 rows; summarize overflow +- **Deterministic results**: Rerunning without changes should produce consistent IDs and counts + +### Analysis Guidelines + +- **NEVER modify files** (this is read-only analysis) +- **NEVER hallucinate missing sections** (if absent, report them accurately) +- **Prioritize constitution violations** (these are always CRITICAL) +- **Use examples over exhaustive rules** (cite specific instances, not generic patterns) +- **Report zero issues gracefully** (emit success report with coverage statistics) + +## Context + +$ARGUMENTS diff --git a/.claude/commands/speckit.checklist.md b/.claude/commands/speckit.checklist.md new file mode 100644 index 0000000..970e6c9 --- /dev/null +++ b/.claude/commands/speckit.checklist.md @@ -0,0 +1,294 @@ +--- +description: Generate a custom checklist for the current feature based on user requirements. +--- + +## Checklist Purpose: "Unit Tests for English" + +**CRITICAL CONCEPT**: Checklists are **UNIT TESTS FOR REQUIREMENTS WRITING** - they validate the quality, clarity, and completeness of requirements in a given domain. + +**NOT for verification/testing**: + +- ❌ NOT "Verify the button clicks correctly" +- ❌ NOT "Test error handling works" +- ❌ NOT "Confirm the API returns 200" +- ❌ NOT checking if code/implementation matches the spec + +**FOR requirements quality validation**: + +- ✅ "Are visual hierarchy requirements defined for all card types?" (completeness) +- ✅ "Is 'prominent display' quantified with specific sizing/positioning?" (clarity) +- ✅ "Are hover state requirements consistent across all interactive elements?" (consistency) +- ✅ "Are accessibility requirements defined for keyboard navigation?" (coverage) +- ✅ "Does the spec define what happens when logo image fails to load?" (edge cases) + +**Metaphor**: If your spec is code written in English, the checklist is its unit test suite. You're testing whether the requirements are well-written, complete, unambiguous, and ready for implementation - NOT whether the implementation works. + +## User Input + +```text +$ARGUMENTS +``` + +You **MUST** consider the user input before proceeding (if not empty). + +## Execution Steps + +1. **Setup**: Run `.specify/scripts/bash/check-prerequisites.sh --json` from repo root and parse JSON for FEATURE_DIR and AVAILABLE_DOCS list. + - All file paths must be absolute. + - For single quotes in args like "I'm Groot", use escape syntax: e.g 'I'\''m Groot' (or double-quote if possible: "I'm Groot"). + +2. **Clarify intent (dynamic)**: Derive up to THREE initial contextual clarifying questions (no pre-baked catalog). They MUST: + - Be generated from the user's phrasing + extracted signals from spec/plan/tasks + - Only ask about information that materially changes checklist content + - Be skipped individually if already unambiguous in `$ARGUMENTS` + - Prefer precision over breadth + + Generation algorithm: + 1. Extract signals: feature domain keywords (e.g., auth, latency, UX, API), risk indicators ("critical", "must", "compliance"), stakeholder hints ("QA", "review", "security team"), and explicit deliverables ("a11y", "rollback", "contracts"). + 2. Cluster signals into candidate focus areas (max 4) ranked by relevance. + 3. Identify probable audience & timing (author, reviewer, QA, release) if not explicit. + 4. Detect missing dimensions: scope breadth, depth/rigor, risk emphasis, exclusion boundaries, measurable acceptance criteria. + 5. Formulate questions chosen from these archetypes: + - Scope refinement (e.g., "Should this include integration touchpoints with X and Y or stay limited to local module correctness?") + - Risk prioritization (e.g., "Which of these potential risk areas should receive mandatory gating checks?") + - Depth calibration (e.g., "Is this a lightweight pre-commit sanity list or a formal release gate?") + - Audience framing (e.g., "Will this be used by the author only or peers during PR review?") + - Boundary exclusion (e.g., "Should we explicitly exclude performance tuning items this round?") + - Scenario class gap (e.g., "No recovery flows detected—are rollback / partial failure paths in scope?") + + Question formatting rules: + - If presenting options, generate a compact table with columns: Option | Candidate | Why It Matters + - Limit to A–E options maximum; omit table if a free-form answer is clearer + - Never ask the user to restate what they already said + - Avoid speculative categories (no hallucination). If uncertain, ask explicitly: "Confirm whether X belongs in scope." + + Defaults when interaction impossible: + - Depth: Standard + - Audience: Reviewer (PR) if code-related; Author otherwise + - Focus: Top 2 relevance clusters + + Output the questions (label Q1/Q2/Q3). After answers: if ≥2 scenario classes (Alternate / Exception / Recovery / Non-Functional domain) remain unclear, you MAY ask up to TWO more targeted follow‑ups (Q4/Q5) with a one-line justification each (e.g., "Unresolved recovery path risk"). Do not exceed five total questions. Skip escalation if user explicitly declines more. + +3. **Understand user request**: Combine `$ARGUMENTS` + clarifying answers: + - Derive checklist theme (e.g., security, review, deploy, ux) + - Consolidate explicit must-have items mentioned by user + - Map focus selections to category scaffolding + - Infer any missing context from spec/plan/tasks (do NOT hallucinate) + +4. **Load feature context**: Read from FEATURE_DIR: + - spec.md: Feature requirements and scope + - plan.md (if exists): Technical details, dependencies + - tasks.md (if exists): Implementation tasks + + **Context Loading Strategy**: + - Load only necessary portions relevant to active focus areas (avoid full-file dumping) + - Prefer summarizing long sections into concise scenario/requirement bullets + - Use progressive disclosure: add follow-on retrieval only if gaps detected + - If source docs are large, generate interim summary items instead of embedding raw text + +5. **Generate checklist** - Create "Unit Tests for Requirements": + - Create `FEATURE_DIR/checklists/` directory if it doesn't exist + - Generate unique checklist filename: + - Use short, descriptive name based on domain (e.g., `ux.md`, `api.md`, `security.md`) + - Format: `[domain].md` + - If file exists, append to existing file + - Number items sequentially starting from CHK001 + - Each `/speckit.checklist` run creates a NEW file (never overwrites existing checklists) + + **CORE PRINCIPLE - Test the Requirements, Not the Implementation**: + Every checklist item MUST evaluate the REQUIREMENTS THEMSELVES for: + - **Completeness**: Are all necessary requirements present? + - **Clarity**: Are requirements unambiguous and specific? + - **Consistency**: Do requirements align with each other? + - **Measurability**: Can requirements be objectively verified? + - **Coverage**: Are all scenarios/edge cases addressed? + + **Category Structure** - Group items by requirement quality dimensions: + - **Requirement Completeness** (Are all necessary requirements documented?) + - **Requirement Clarity** (Are requirements specific and unambiguous?) + - **Requirement Consistency** (Do requirements align without conflicts?) + - **Acceptance Criteria Quality** (Are success criteria measurable?) + - **Scenario Coverage** (Are all flows/cases addressed?) + - **Edge Case Coverage** (Are boundary conditions defined?) + - **Non-Functional Requirements** (Performance, Security, Accessibility, etc. - are they specified?) + - **Dependencies & Assumptions** (Are they documented and validated?) + - **Ambiguities & Conflicts** (What needs clarification?) + + **HOW TO WRITE CHECKLIST ITEMS - "Unit Tests for English"**: + + ❌ **WRONG** (Testing implementation): + - "Verify landing page displays 3 episode cards" + - "Test hover states work on desktop" + - "Confirm logo click navigates home" + + ✅ **CORRECT** (Testing requirements quality): + - "Are the exact number and layout of featured episodes specified?" [Completeness] + - "Is 'prominent display' quantified with specific sizing/positioning?" [Clarity] + - "Are hover state requirements consistent across all interactive elements?" [Consistency] + - "Are keyboard navigation requirements defined for all interactive UI?" [Coverage] + - "Is the fallback behavior specified when logo image fails to load?" [Edge Cases] + - "Are loading states defined for asynchronous episode data?" [Completeness] + - "Does the spec define visual hierarchy for competing UI elements?" [Clarity] + + **ITEM STRUCTURE**: + Each item should follow this pattern: + - Question format asking about requirement quality + - Focus on what's WRITTEN (or not written) in the spec/plan + - Include quality dimension in brackets [Completeness/Clarity/Consistency/etc.] + - Reference spec section `[Spec §X.Y]` when checking existing requirements + - Use `[Gap]` marker when checking for missing requirements + + **EXAMPLES BY QUALITY DIMENSION**: + + Completeness: + - "Are error handling requirements defined for all API failure modes? [Gap]" + - "Are accessibility requirements specified for all interactive elements? [Completeness]" + - "Are mobile breakpoint requirements defined for responsive layouts? [Gap]" + + Clarity: + - "Is 'fast loading' quantified with specific timing thresholds? [Clarity, Spec §NFR-2]" + - "Are 'related episodes' selection criteria explicitly defined? [Clarity, Spec §FR-5]" + - "Is 'prominent' defined with measurable visual properties? [Ambiguity, Spec §FR-4]" + + Consistency: + - "Do navigation requirements align across all pages? [Consistency, Spec §FR-10]" + - "Are card component requirements consistent between landing and detail pages? [Consistency]" + + Coverage: + - "Are requirements defined for zero-state scenarios (no episodes)? [Coverage, Edge Case]" + - "Are concurrent user interaction scenarios addressed? [Coverage, Gap]" + - "Are requirements specified for partial data loading failures? [Coverage, Exception Flow]" + + Measurability: + - "Are visual hierarchy requirements measurable/testable? [Acceptance Criteria, Spec §FR-1]" + - "Can 'balanced visual weight' be objectively verified? [Measurability, Spec §FR-2]" + + **Scenario Classification & Coverage** (Requirements Quality Focus): + - Check if requirements exist for: Primary, Alternate, Exception/Error, Recovery, Non-Functional scenarios + - For each scenario class, ask: "Are [scenario type] requirements complete, clear, and consistent?" + - If scenario class missing: "Are [scenario type] requirements intentionally excluded or missing? [Gap]" + - Include resilience/rollback when state mutation occurs: "Are rollback requirements defined for migration failures? [Gap]" + + **Traceability Requirements**: + - MINIMUM: ≥80% of items MUST include at least one traceability reference + - Each item should reference: spec section `[Spec §X.Y]`, or use markers: `[Gap]`, `[Ambiguity]`, `[Conflict]`, `[Assumption]` + - If no ID system exists: "Is a requirement & acceptance criteria ID scheme established? [Traceability]" + + **Surface & Resolve Issues** (Requirements Quality Problems): + Ask questions about the requirements themselves: + - Ambiguities: "Is the term 'fast' quantified with specific metrics? [Ambiguity, Spec §NFR-1]" + - Conflicts: "Do navigation requirements conflict between §FR-10 and §FR-10a? [Conflict]" + - Assumptions: "Is the assumption of 'always available podcast API' validated? [Assumption]" + - Dependencies: "Are external podcast API requirements documented? [Dependency, Gap]" + - Missing definitions: "Is 'visual hierarchy' defined with measurable criteria? [Gap]" + + **Content Consolidation**: + - Soft cap: If raw candidate items > 40, prioritize by risk/impact + - Merge near-duplicates checking the same requirement aspect + - If >5 low-impact edge cases, create one item: "Are edge cases X, Y, Z addressed in requirements? [Coverage]" + + **🚫 ABSOLUTELY PROHIBITED** - These make it an implementation test, not a requirements test: + - ❌ Any item starting with "Verify", "Test", "Confirm", "Check" + implementation behavior + - ❌ References to code execution, user actions, system behavior + - ❌ "Displays correctly", "works properly", "functions as expected" + - ❌ "Click", "navigate", "render", "load", "execute" + - ❌ Test cases, test plans, QA procedures + - ❌ Implementation details (frameworks, APIs, algorithms) + + **✅ REQUIRED PATTERNS** - These test requirements quality: + - ✅ "Are [requirement type] defined/specified/documented for [scenario]?" + - ✅ "Is [vague term] quantified/clarified with specific criteria?" + - ✅ "Are requirements consistent between [section A] and [section B]?" + - ✅ "Can [requirement] be objectively measured/verified?" + - ✅ "Are [edge cases/scenarios] addressed in requirements?" + - ✅ "Does the spec define [missing aspect]?" + +6. **Structure Reference**: Generate the checklist following the canonical template in `.specify/templates/checklist-template.md` for title, meta section, category headings, and ID formatting. If template is unavailable, use: H1 title, purpose/created meta lines, `##` category sections containing `- [ ] CHK### ` lines with globally incrementing IDs starting at CHK001. + +7. **Report**: Output full path to created checklist, item count, and remind user that each run creates a new file. Summarize: + - Focus areas selected + - Depth level + - Actor/timing + - Any explicit user-specified must-have items incorporated + +**Important**: Each `/speckit.checklist` command invocation creates a checklist file using short, descriptive names unless file already exists. This allows: + +- Multiple checklists of different types (e.g., `ux.md`, `test.md`, `security.md`) +- Simple, memorable filenames that indicate checklist purpose +- Easy identification and navigation in the `checklists/` folder + +To avoid clutter, use descriptive types and clean up obsolete checklists when done. + +## Example Checklist Types & Sample Items + +**UX Requirements Quality:** `ux.md` + +Sample items (testing the requirements, NOT the implementation): + +- "Are visual hierarchy requirements defined with measurable criteria? [Clarity, Spec §FR-1]" +- "Is the number and positioning of UI elements explicitly specified? [Completeness, Spec §FR-1]" +- "Are interaction state requirements (hover, focus, active) consistently defined? [Consistency]" +- "Are accessibility requirements specified for all interactive elements? [Coverage, Gap]" +- "Is fallback behavior defined when images fail to load? [Edge Case, Gap]" +- "Can 'prominent display' be objectively measured? [Measurability, Spec §FR-4]" + +**API Requirements Quality:** `api.md` + +Sample items: + +- "Are error response formats specified for all failure scenarios? [Completeness]" +- "Are rate limiting requirements quantified with specific thresholds? [Clarity]" +- "Are authentication requirements consistent across all endpoints? [Consistency]" +- "Are retry/timeout requirements defined for external dependencies? [Coverage, Gap]" +- "Is versioning strategy documented in requirements? [Gap]" + +**Performance Requirements Quality:** `performance.md` + +Sample items: + +- "Are performance requirements quantified with specific metrics? [Clarity]" +- "Are performance targets defined for all critical user journeys? [Coverage]" +- "Are performance requirements under different load conditions specified? [Completeness]" +- "Can performance requirements be objectively measured? [Measurability]" +- "Are degradation requirements defined for high-load scenarios? [Edge Case, Gap]" + +**Security Requirements Quality:** `security.md` + +Sample items: + +- "Are authentication requirements specified for all protected resources? [Coverage]" +- "Are data protection requirements defined for sensitive information? [Completeness]" +- "Is the threat model documented and requirements aligned to it? [Traceability]" +- "Are security requirements consistent with compliance obligations? [Consistency]" +- "Are security failure/breach response requirements defined? [Gap, Exception Flow]" + +## Anti-Examples: What NOT To Do + +**❌ WRONG - These test implementation, not requirements:** + +```markdown +- [ ] CHK001 - Verify landing page displays 3 episode cards [Spec §FR-001] +- [ ] CHK002 - Test hover states work correctly on desktop [Spec §FR-003] +- [ ] CHK003 - Confirm logo click navigates to home page [Spec §FR-010] +- [ ] CHK004 - Check that related episodes section shows 3-5 items [Spec §FR-005] +``` + +**✅ CORRECT - These test requirements quality:** + +```markdown +- [ ] CHK001 - Are the number and layout of featured episodes explicitly specified? [Completeness, Spec §FR-001] +- [ ] CHK002 - Are hover state requirements consistently defined for all interactive elements? [Consistency, Spec §FR-003] +- [ ] CHK003 - Are navigation requirements clear for all clickable brand elements? [Clarity, Spec §FR-010] +- [ ] CHK004 - Is the selection criteria for related episodes documented? [Gap, Spec §FR-005] +- [ ] CHK005 - Are loading state requirements defined for asynchronous episode data? [Gap] +- [ ] CHK006 - Can "visual hierarchy" requirements be objectively measured? [Measurability, Spec §FR-001] +``` + +**Key Differences:** + +- Wrong: Tests if the system works correctly +- Correct: Tests if the requirements are written correctly +- Wrong: Verification of behavior +- Correct: Validation of requirement quality +- Wrong: "Does it do X?" +- Correct: "Is X clearly specified?" diff --git a/.claude/commands/speckit.clarify.md b/.claude/commands/speckit.clarify.md new file mode 100644 index 0000000..6b28dae --- /dev/null +++ b/.claude/commands/speckit.clarify.md @@ -0,0 +1,181 @@ +--- +description: Identify underspecified areas in the current feature spec by asking up to 5 highly targeted clarification questions and encoding answers back into the spec. +handoffs: + - label: Build Technical Plan + agent: speckit.plan + prompt: Create a plan for the spec. I am building with... +--- + +## User Input + +```text +$ARGUMENTS +``` + +You **MUST** consider the user input before proceeding (if not empty). + +## Outline + +Goal: Detect and reduce ambiguity or missing decision points in the active feature specification and record the clarifications directly in the spec file. + +Note: This clarification workflow is expected to run (and be completed) BEFORE invoking `/speckit.plan`. If the user explicitly states they are skipping clarification (e.g., exploratory spike), you may proceed, but must warn that downstream rework risk increases. + +Execution steps: + +1. Run `.specify/scripts/bash/check-prerequisites.sh --json --paths-only` from repo root **once** (combined `--json --paths-only` mode / `-Json -PathsOnly`). Parse minimal JSON payload fields: + - `FEATURE_DIR` + - `FEATURE_SPEC` + - (Optionally capture `IMPL_PLAN`, `TASKS` for future chained flows.) + - If JSON parsing fails, abort and instruct user to re-run `/speckit.specify` or verify feature branch environment. + - For single quotes in args like "I'm Groot", use escape syntax: e.g 'I'\''m Groot' (or double-quote if possible: "I'm Groot"). + +2. Load the current spec file. Perform a structured ambiguity & coverage scan using this taxonomy. For each category, mark status: Clear / Partial / Missing. Produce an internal coverage map used for prioritization (do not output raw map unless no questions will be asked). + + Functional Scope & Behavior: + - Core user goals & success criteria + - Explicit out-of-scope declarations + - User roles / personas differentiation + + Domain & Data Model: + - Entities, attributes, relationships + - Identity & uniqueness rules + - Lifecycle/state transitions + - Data volume / scale assumptions + + Interaction & UX Flow: + - Critical user journeys / sequences + - Error/empty/loading states + - Accessibility or localization notes + + Non-Functional Quality Attributes: + - Performance (latency, throughput targets) + - Scalability (horizontal/vertical, limits) + - Reliability & availability (uptime, recovery expectations) + - Observability (logging, metrics, tracing signals) + - Security & privacy (authN/Z, data protection, threat assumptions) + - Compliance / regulatory constraints (if any) + + Integration & External Dependencies: + - External services/APIs and failure modes + - Data import/export formats + - Protocol/versioning assumptions + + Edge Cases & Failure Handling: + - Negative scenarios + - Rate limiting / throttling + - Conflict resolution (e.g., concurrent edits) + + Constraints & Tradeoffs: + - Technical constraints (language, storage, hosting) + - Explicit tradeoffs or rejected alternatives + + Terminology & Consistency: + - Canonical glossary terms + - Avoided synonyms / deprecated terms + + Completion Signals: + - Acceptance criteria testability + - Measurable Definition of Done style indicators + + Misc / Placeholders: + - TODO markers / unresolved decisions + - Ambiguous adjectives ("robust", "intuitive") lacking quantification + + For each category with Partial or Missing status, add a candidate question opportunity unless: + - Clarification would not materially change implementation or validation strategy + - Information is better deferred to planning phase (note internally) + +3. Generate (internally) a prioritized queue of candidate clarification questions (maximum 5). Do NOT output them all at once. Apply these constraints: + - Maximum of 10 total questions across the whole session. + - Each question must be answerable with EITHER: + - A short multiple‑choice selection (2–5 distinct, mutually exclusive options), OR + - A one-word / short‑phrase answer (explicitly constrain: "Answer in <=5 words"). + - Only include questions whose answers materially impact architecture, data modeling, task decomposition, test design, UX behavior, operational readiness, or compliance validation. + - Ensure category coverage balance: attempt to cover the highest impact unresolved categories first; avoid asking two low-impact questions when a single high-impact area (e.g., security posture) is unresolved. + - Exclude questions already answered, trivial stylistic preferences, or plan-level execution details (unless blocking correctness). + - Favor clarifications that reduce downstream rework risk or prevent misaligned acceptance tests. + - If more than 5 categories remain unresolved, select the top 5 by (Impact * Uncertainty) heuristic. + +4. Sequential questioning loop (interactive): + - Present EXACTLY ONE question at a time. + - For multiple‑choice questions: + - **Analyze all options** and determine the **most suitable option** based on: + - Best practices for the project type + - Common patterns in similar implementations + - Risk reduction (security, performance, maintainability) + - Alignment with any explicit project goals or constraints visible in the spec + - Present your **recommended option prominently** at the top with clear reasoning (1-2 sentences explaining why this is the best choice). + - Format as: `**Recommended:** Option [X] - ` + - Then render all options as a Markdown table: + + | Option | Description | + |--------|-------------| + | A |