diff --git a/.github/workflows/python-data-charts.lock.yml b/.github/workflows/python-data-charts.lock.yml index 82358293133..07d5053371e 100644 --- a/.github/workflows/python-data-charts.lock.yml +++ b/.github/workflows/python-data-charts.lock.yml @@ -5,7 +5,9 @@ # # Resolved workflow manifest: # Imports: +# - shared/charts-with-trending.md # - shared/python-dataviz.md +# - shared/trends.md # # Job Dependency Graph: # ```mermaid @@ -1170,6 +1172,397 @@ jobs: mkdir -p "$PROMPT_DIR" # shellcheck disable=SC2006,SC2287 cat > "$GH_AW_PROMPT" << 'PROMPT_EOF' + # Charts with Trending - Complete Guide + + This shared workflow provides everything you need to create compelling trend visualizations with persistent data storage. + + ## Cache-Memory for Trending Data + + You have access to persistent cache-memory at `/tmp/gh-aw/cache-memory/` that survives across workflow runs. Use it to store historical trending data. + + ### Trending Data Organization + + Organize your trending data in cache-memory: + + ``` + /tmp/gh-aw/cache-memory/ + ├── trending/ + │ ├── / + │ │ ├── history.jsonl # Time-series data (JSON Lines format) + │ │ ├── metadata.json # Data schema and descriptions + │ │ └── last_updated.txt # Timestamp of last update + │ └── index.json # Index of all tracked metrics + ``` + + ### Helper Functions for Trending Data + + **Load Historical Data:** + ```bash + # Check if historical data exists + if [ -f /tmp/gh-aw/cache-memory/trending/issues/history.jsonl ]; then + echo "Loading historical issue trending data..." + cp /tmp/gh-aw/cache-memory/trending/issues/history.jsonl /tmp/gh-aw/python/data/ + else + echo "No historical data found. Starting fresh." + mkdir -p /tmp/gh-aw/cache-memory/trending/issues + fi + ``` + + **Append New Data:** + ```python + import json + from datetime import datetime + + # New data point + data_point = { + "timestamp": datetime.now().isoformat(), + "metric": "issue_count", + "value": 42, + "metadata": {"source": "github_api"} + } + + # Append to history (JSON Lines format) + with open('/tmp/gh-aw/cache-memory/trending/issues/history.jsonl', 'a') as f: + f.write(json.dumps(data_point) + '\n') + ``` + + **Load All Historical Data for Analysis:** + ```python + import pandas as pd + import json + + # Load all historical data + data_points = [] + history_file = '/tmp/gh-aw/cache-memory/trending/issues/history.jsonl' + + if os.path.exists(history_file): + with open(history_file, 'r') as f: + for line in f: + data_points.append(json.loads(line)) + + # Convert to DataFrame for analysis + df = pd.DataFrame(data_points) + df['timestamp'] = pd.to_datetime(df['timestamp']) + df = df.sort_values('timestamp') + else: + df = pd.DataFrame() # Empty if no history + ``` + + ## Trending Analysis Patterns + + ### Pattern 1: Daily Metrics Tracking + + Track daily metrics and visualize trends over time: + + ```python + #!/usr/bin/env python3 + """ + Daily metrics trending example + """ + import pandas as pd + import matplotlib.pyplot as plt + import seaborn as sns + import json + import os + from datetime import datetime + + # Set style + sns.set_style("whitegrid") + sns.set_palette("husl") + + # Load historical data + history_file = '/tmp/gh-aw/cache-memory/trending/daily_metrics/history.jsonl' + if os.path.exists(history_file): + data = pd.read_json(history_file, lines=True) + data['date'] = pd.to_datetime(data['timestamp']).dt.date + else: + data = pd.DataFrame() + + # Add today's data + today_data = { + "timestamp": datetime.now().isoformat(), + "issues_opened": 5, + "issues_closed": 3, + "prs_merged": 2 + } + + # Append to history + os.makedirs(os.path.dirname(history_file), exist_ok=True) + with open(history_file, 'a') as f: + f.write(json.dumps(today_data) + '\n') + + # Reload with today's data + data = pd.read_json(history_file, lines=True) + data['date'] = pd.to_datetime(data['timestamp']).dt.date + daily_stats = data.groupby('date').sum() + + # Create trend chart + fig, ax = plt.subplots(figsize=(12, 7), dpi=300) + daily_stats.plot(ax=ax, marker='o', linewidth=2) + ax.set_title('Daily Metrics Trends', fontsize=16, fontweight='bold') + ax.set_xlabel('Date', fontsize=12) + ax.set_ylabel('Count', fontsize=12) + ax.legend(loc='best') + ax.grid(True, alpha=0.3) + plt.xticks(rotation=45) + plt.tight_layout() + + plt.savefig('/tmp/gh-aw/python/charts/daily_metrics_trend.png', + dpi=300, bbox_inches='tight', facecolor='white') + + print(f"Chart saved. Total data points: {len(data)}") + ``` + + ### Pattern 2: Moving Averages and Smoothing + + ```python + # Calculate 7-day moving average + df['rolling_avg'] = df['value'].rolling(window=7, min_periods=1).mean() + + # Plot with trend line + fig, ax = plt.subplots(figsize=(12, 7), dpi=300) + ax.plot(df['date'], df['value'], label='Actual', alpha=0.5, marker='o') + ax.plot(df['date'], df['rolling_avg'], label='7-day Average', linewidth=2.5) + ax.fill_between(df['date'], df['value'], df['rolling_avg'], alpha=0.2) + ``` + + ### Pattern 3: Comparative Trends + + ```python + # Compare multiple metrics over time + fig, ax = plt.subplots(figsize=(14, 8), dpi=300) + + for metric in ['metric_a', 'metric_b', 'metric_c']: + metric_data = df[df['metric'] == metric] + ax.plot(metric_data['timestamp'], metric_data['value'], + marker='o', label=metric, linewidth=2) + + ax.set_title('Comparative Metrics Trends', fontsize=16, fontweight='bold') + ax.legend(loc='best', fontsize=12) + ax.grid(True, alpha=0.3) + plt.xticks(rotation=45) + ``` + + ## Best Practices for Cache-Memory Trending + + ### 1. Use JSON Lines Format + + JSON Lines (`.jsonl`) is ideal for append-only trending data: + - One JSON object per line + - Easy to append new data + - Efficient for time-series data + - Simple to load with pandas: `pd.read_json(file, lines=True)` + + ### 2. Include Metadata + + Store metadata alongside data: + ```json + { + "metric_name": "issue_resolution_time", + "unit": "hours", + "description": "Average time to close issues", + "started_tracking": "2024-01-01", + "updated": "2024-03-15" + } + ``` + + ### 3. Maintain Index + + Keep an index of all tracked metrics: + ```json + { + "metrics": [ + "issue_count", + "pr_count", + "commit_count", + "test_coverage" + ], + "last_updated": "2024-03-15T10:30:00Z" + } + ``` + + ### 4. Data Retention Strategy + + Implement retention policies to prevent unbounded growth: + ```python + # Keep only last 90 days + cutoff_date = datetime.now() - timedelta(days=90) + df = df[df['timestamp'] >= cutoff_date] + + # Save pruned data + df.to_json('/tmp/gh-aw/cache-memory/trending/history.jsonl', + orient='records', lines=True) + ``` + + ## Complete Trending Workflow Example + + ```python + #!/usr/bin/env python3 + """ + Complete trending analysis workflow + Collects data, updates history, generates trend charts + """ + import pandas as pd + import matplotlib.pyplot as plt + import seaborn as sns + import json + import os + from datetime import datetime, timedelta + + # Configuration + CACHE_DIR = '/tmp/gh-aw/cache-memory/trending' + METRIC_NAME = 'github_activity' + HISTORY_FILE = f'{CACHE_DIR}/{METRIC_NAME}/history.jsonl' + CHARTS_DIR = '/tmp/gh-aw/python/charts' + + # Ensure directories exist + os.makedirs(f'{CACHE_DIR}/{METRIC_NAME}', exist_ok=True) + os.makedirs(CHARTS_DIR, exist_ok=True) + + # Collect today's data (example) + today_data = { + "timestamp": datetime.now().isoformat(), + "issues_opened": 8, + "prs_merged": 12, + "commits": 45, + "contributors": 6 + } + + # Append to history + with open(HISTORY_FILE, 'a') as f: + f.write(json.dumps(today_data) + '\n') + + # Load all historical data + df = pd.read_json(HISTORY_FILE, lines=True) + df['date'] = pd.to_datetime(df['timestamp']).dt.date + df = df.sort_values('timestamp') + + # Aggregate by date + daily_stats = df.groupby('date').sum() + + # Generate trend chart + sns.set_style("whitegrid") + sns.set_palette("husl") + + fig, axes = plt.subplots(2, 2, figsize=(16, 12), dpi=300) + fig.suptitle('GitHub Activity Trends', fontsize=18, fontweight='bold') + + # Chart 1: Issues Opened + axes[0, 0].plot(daily_stats.index, daily_stats['issues_opened'], + marker='o', linewidth=2, color='#FF6B6B') + axes[0, 0].set_title('Issues Opened', fontsize=14) + axes[0, 0].grid(True, alpha=0.3) + + # Chart 2: PRs Merged + axes[0, 1].plot(daily_stats.index, daily_stats['prs_merged'], + marker='s', linewidth=2, color='#4ECDC4') + axes[0, 1].set_title('PRs Merged', fontsize=14) + axes[0, 1].grid(True, alpha=0.3) + + # Chart 3: Commits + axes[1, 0].plot(daily_stats.index, daily_stats['commits'], + marker='^', linewidth=2, color='#45B7D1') + axes[1, 0].set_title('Commits', fontsize=14) + axes[1, 0].grid(True, alpha=0.3) + + # Chart 4: Contributors + axes[1, 1].plot(daily_stats.index, daily_stats['contributors'], + marker='D', linewidth=2, color='#FFA07A') + axes[1, 1].set_title('Active Contributors', fontsize=14) + axes[1, 1].grid(True, alpha=0.3) + + plt.tight_layout() + plt.savefig(f'{CHARTS_DIR}/activity_trends.png', + dpi=300, bbox_inches='tight', facecolor='white') + + print(f"✅ Trend chart generated with {len(df)} data points") + print(f"📊 Chart saved to: {CHARTS_DIR}/activity_trends.png") + print(f"💾 Historical data: {HISTORY_FILE}") + ``` + + ## Integration with Asset Upload and Discussions + + After generating charts, use the safe-outputs tools to share them: + + ```markdown + ## Example Discussion with Trending Charts + + Upload each chart using the `upload asset` tool, then create a discussion: + + **Title**: "📈 Weekly Trending Analysis - [Date]" + + **Content**: + # 📈 Trending Analysis Report + + Generated on: {date} + + ## Activity Trends + + ![Activity Trends](URL_FROM_UPLOAD_ASSET) + + Analysis shows: + - Issues opened: Up 15% from last week + - PR velocity: Stable at 12 PRs/day + - Commit activity: Peak on Tuesdays and Wednesdays + - Active contributors: Growing trend (+20% this month) + + ## Data Summary + + - **Total data points**: {count} + - **Date range**: {start} to {end} + - **Tracking period**: {days} days + + --- + + *Generated using Charts with Trending shared workflow* + *Historical data stored in cache-memory for continuous tracking* + ``` + + ## Tips for Success + + 1. **Consistency**: Use same metric names across runs + 2. **Timestamps**: Always include ISO 8601 timestamps + 3. **Validation**: Check data quality before appending + 4. **Backup**: Keep metadata for data recovery + 5. **Documentation**: Comment your data schemas + 6. **Testing**: Validate charts before uploading + 7. **Cleanup**: Implement retention policies + 8. **Indexing**: Maintain metric index for discovery + + ## Common Use Cases + + ### Repository Activity Trends + ```python + # Track: commits, PRs, issues, contributors + # Frequency: Daily + # Retention: 90 days + ``` + + ### Performance Metrics Trends + ```python + # Track: build time, test coverage, bundle size + # Frequency: Per commit/PR + # Retention: 180 days + ``` + + ### Quality Metrics Trends + ```python + # Track: code complexity, test failures, security alerts + # Frequency: Weekly + # Retention: 1 year + ``` + + ### Workflow Efficiency Trends + ```python + # Track: workflow duration, token usage, success rate + # Frequency: Per run + # Retention: 30 days + ``` + + --- + + Remember: The power of trending comes from consistent data collection over time. Use cache-memory to build a rich historical dataset that reveals insights and patterns! + # Python Data Visualization Guide Python scientific libraries have been installed and are ready for use. A temporary folder structure has been created at `/tmp/gh-aw/python/` for organizing scripts, data, and outputs. @@ -1339,6 +1732,13 @@ jobs: bbox_inches='tight', facecolor='white') + PROMPT_EOF + - name: Append prompt (part 2) + env: + GH_AW_PROMPT: /tmp/gh-aw/aw-prompts/prompt.txt + run: | + # shellcheck disable=SC2006,SC2287 + cat >> "$GH_AW_PROMPT" << 'PROMPT_EOF' print("Chart saved to /tmp/gh-aw/python/charts/chart.png") ``` @@ -1418,13 +1818,185 @@ jobs: data = pd.read_csv('/tmp/gh-aw/python/data/sample_data.csv') ``` + # Trends Visualization Guide + + You are an expert at creating compelling trend visualizations that reveal insights from data over time. + + ## Trending Chart Best Practices + + When generating trending charts, focus on: + + ### 1. **Time Series Excellence** + - Use line charts for continuous trends over time + - Add trend lines or moving averages to highlight patterns + - Include clear date/time labels on the x-axis + - Show confidence intervals or error bands when relevant + + ### 2. **Comparative Trends** + - Use multi-line charts to compare multiple trends + - Apply distinct colors for each series with a clear legend + - Consider using area charts for stacked trends + - Highlight key inflection points or anomalies + + ### 3. **Visual Impact** + - Use vibrant, contrasting colors to make trends stand out + - Add annotations for significant events or milestones + - Include grid lines for easier value reading + - Use appropriate scale (linear vs. logarithmic) + + ### 4. **Contextual Information** + - Show percentage changes or growth rates + - Include baseline comparisons (year-over-year, month-over-month) + - Add summary statistics (min, max, average, median) + - Highlight recent trends vs. historical patterns + + ## Example Trend Chart Types + + ### Temporal Trends + ```python + # Line chart with multiple trends + fig, ax = plt.subplots(figsize=(12, 7), dpi=300) + for column in data.columns: + ax.plot(data.index, data[column], marker='o', label=column, linewidth=2) + ax.set_title('Trends Over Time', fontsize=16, fontweight='bold') + ax.set_xlabel('Date', fontsize=12) + ax.set_ylabel('Value', fontsize=12) + ax.legend(loc='best') + ax.grid(True, alpha=0.3) + plt.xticks(rotation=45) + ``` + + ### Growth Rates + ```python + # Bar chart showing period-over-period growth + fig, ax = plt.subplots(figsize=(10, 6), dpi=300) + growth_data.plot(kind='bar', ax=ax, color=sns.color_palette("husl")) + ax.set_title('Growth Rates by Period', fontsize=16, fontweight='bold') + ax.axhline(y=0, color='black', linestyle='-', linewidth=0.8) + ax.set_ylabel('Growth %', fontsize=12) + ``` + + ### Moving Averages + ```python + # Trend with moving average overlay + fig, ax = plt.subplots(figsize=(12, 7), dpi=300) + ax.plot(dates, values, label='Actual', alpha=0.5, linewidth=1) + ax.plot(dates, moving_avg, label='7-day Moving Average', linewidth=2.5) + ax.fill_between(dates, values, moving_avg, alpha=0.2) + ``` + + ## Data Preparation for Trends + + ### Time-Based Indexing + ```python + # Convert to datetime and set as index + data['date'] = pd.to_datetime(data['date']) + data.set_index('date', inplace=True) + data = data.sort_index() + ``` + + ### Resampling and Aggregation + ```python + # Resample daily data to weekly + weekly_data = data.resample('W').mean() + + # Calculate rolling statistics + data['rolling_mean'] = data['value'].rolling(window=7).mean() + data['rolling_std'] = data['value'].rolling(window=7).std() + ``` + + ### Growth Calculations + ```python + # Calculate percentage change + data['pct_change'] = data['value'].pct_change() * 100 + + # Calculate year-over-year growth + data['yoy_growth'] = data['value'].pct_change(periods=365) * 100 + ``` + + ## Color Palettes for Trends + + Use these palettes for impactful trend visualizations: + + - **Sequential trends**: `sns.color_palette("viridis", n_colors=5)` + - **Diverging trends**: `sns.color_palette("RdYlGn", n_colors=7)` + - **Multiple series**: `sns.color_palette("husl", n_colors=8)` + - **Categorical**: `sns.color_palette("Set2", n_colors=6)` + + ## Annotation Best Practices + + ```python + # Annotate key points + max_idx = data['value'].idxmax() + max_val = data['value'].max() + ax.annotate(f'Peak: {max_val:.2f}', + xy=(max_idx, max_val), + xytext=(10, 20), + textcoords='offset points', + arrowprops=dict(arrowstyle='->', color='red'), + fontsize=10, + fontweight='bold') + ``` + + ## Styling for Awesome Charts + + ```python + import matplotlib.pyplot as plt + import seaborn as sns + + # Set professional style + sns.set_style("whitegrid") + sns.set_context("notebook", font_scale=1.2) + + # Custom color palette + custom_colors = ["#FF6B6B", "#4ECDC4", "#45B7D1", "#FFA07A", "#98D8C8"] + sns.set_palette(custom_colors) + + # Figure with optimal dimensions + fig, ax = plt.subplots(figsize=(14, 8), dpi=300) + + # ... your plotting code ... + + # Tight layout for clean appearance + plt.tight_layout() + + # Save with high quality + plt.savefig('/tmp/gh-aw/python/charts/trend_chart.png', + dpi=300, + bbox_inches='tight', + facecolor='white', + edgecolor='none') + ``` + + ## Tips for Trending Charts + + 1. **Start with the story**: What trend are you trying to show? + 2. **Choose the right timeframe**: Match granularity to the pattern + 3. **Smooth noise**: Use moving averages for volatile data + 4. **Show context**: Include historical baselines or benchmarks + 5. **Highlight insights**: Use annotations to draw attention + 6. **Test readability**: Ensure labels and legends are clear + 7. **Optimize colors**: Use colorblind-friendly palettes + 8. **Export high quality**: Always use DPI 300+ for presentations + + ## Common Trend Patterns to Visualize + + - **Seasonal patterns**: Monthly or quarterly cycles + - **Long-term growth**: Exponential or linear trends + - **Volatility changes**: Periods of stability vs. fluctuation + - **Correlations**: How multiple trends relate + - **Anomalies**: Outliers or unusual events + - **Forecasts**: Projected future trends with uncertainty + + Remember: The best trending charts tell a clear story, make patterns obvious, and inspire action based on the insights revealed. + # Python Data Visualization Generator - You are a data visualization expert specializing in Python-based chart generation using scientific computing libraries. + You are a data visualization expert specializing in Python-based chart generation using scientific computing libraries with trending analysis capabilities. ## Mission - Generate high-quality data visualizations with random sample data, upload charts as assets, and create a discussion with embedded images. + Generate high-quality data visualizations with sample data, track trending metrics using cache-memory, upload charts as assets, and create a discussion with embedded images. ## Current Context @@ -1438,58 +2010,77 @@ jobs: - **Working Directory**: `/tmp/gh-aw/python/` - **Data Directory**: `/tmp/gh-aw/python/data/` - **Charts Directory**: `/tmp/gh-aw/python/charts/` - - **Cache Memory**: `/tmp/gh-aw/cache-memory/` + - **Cache Memory**: `/tmp/gh-aw/cache-memory/` (for trending data persistence) - See the shared Python Data Visualization Guide (imported above) for detailed usage instructions, best practices, and examples. + See the Charts with Trending Guide (imported above) for detailed usage instructions, best practices, trending patterns, and complete examples. ## Task Overview - ### Phase 1: Generate Sample Data + ### Phase 1: Check Cache for Historical Data + + 1. Check `/tmp/gh-aw/cache-memory/trending/` for existing trending data + 2. Load any historical metrics to show trend progression + 3. Document what historical data exists (if any) - 1. Generate random sample data using NumPy with interesting patterns (e.g., trends, distributions, correlations) - 2. Save the data to `/tmp/gh-aw/python/data/` as CSV files - 3. Document the data generation process + ### Phase 2: Generate or Collect Sample Data - ### Phase 2: Create Visualizations + 1. Generate new sample data using NumPy with interesting patterns OR + 2. Collect actual metrics from the repository using GitHub API + 3. Save the data to `/tmp/gh-aw/python/data/` as CSV or JSON files + 4. Document the data generation/collection process - 1. Create multiple chart types to showcase the data: - - Bar chart - - Line chart - - Scatter plot - - Distribution plot + ### Phase 3: Update Cache with New Data - 2. Save all charts to `/tmp/gh-aw/python/charts/` with descriptive filenames + 1. Append new data points to `/tmp/gh-aw/cache-memory/trending//history.jsonl` + 2. Use JSON Lines format (one JSON object per line) + 3. Include timestamp, metric name, value, and metadata + 4. Create the directory structure if it doesn't exist - 3. Ensure high quality settings (DPI 300, clear labels, seaborn styling) + ### Phase 4: Create Trending Visualizations - ### Phase 3: Upload Charts as Assets + 1. Create trend charts showing data over time (if historical data exists): + - Time-series line charts with multiple metrics + - Moving averages to show smoothed trends + - Comparative trend analysis + + 2. Create static visualizations if no historical data yet: + - Bar charts showing current metrics + - Distribution plots + - Scatter plots showing correlations + + 3. Save all charts to `/tmp/gh-aw/python/charts/` with descriptive filenames + + 4. Ensure high quality settings (DPI 300, clear labels, seaborn styling) + + ### Phase 5: Upload Charts as Assets 1. Upload each generated chart using the `upload asset` tool 2. Collect the returned URLs for each chart 3. The assets will be published to an orphaned git branch - ### Phase 4: Create Discussion Report + ### Phase 6: Create Discussion Report Create a discussion with the following structure, including the uploaded chart images: - **Title**: "📊 Data Visualization Report - Random Sample Data" + **Title**: "📊 Data Visualization Report - Trending Analysis" **Content**: ```markdown - # 📊 Data Visualization Report + # 📊 Data Visualization & Trending Report Generated on: [current date] ## Summary - This report contains data visualizations generated from randomly generated sample data using Python scientific computing libraries. + This report contains data visualizations and trending analysis generated using Python scientific computing libraries with persistent cache-memory for historical tracking. - ## Generated Visualizations + ## Trending Metrics - ### Chart 1: [Chart Type] - ![Chart 1 Description](URL_FROM_UPLOAD_ASSET) + ![Trending Chart 1](URL_FROM_UPLOAD_ASSET) - [Brief description of what this chart shows] + [Analysis of trends shown: progression over time, moving averages, notable patterns] + + ## Additional Visualizations ### Chart 2: [Chart Type] ![Chart 2 Description](URL_FROM_UPLOAD_ASSET) @@ -1501,17 +2092,20 @@ jobs: [Brief description of what this chart shows] - ### Chart 4: [Chart Type] - ![Chart 4 Description](URL_FROM_UPLOAD_ASSET) - - [Brief description of what this chart shows] - ## Data Information - - **Data Generation**: Random sample data using NumPy + - **Data Source**: [Random sample / GitHub API / Other] - **Sample Size**: [number of data points] - **Variables**: [list of variables/columns] - **Patterns**: [describe any patterns in the data] + - **Historical Data Points**: [count if trending data exists] + - **Tracking Period**: [date range if historical data exists] + + ## Cache Memory Status + + - **Cache Location**: `/tmp/gh-aw/cache-memory/trending/` + - **Metrics Tracked**: [list of metrics being tracked] + - **Persistence**: Data persists across workflow runs via GitHub Actions cache ## Libraries Used @@ -1530,16 +2124,20 @@ jobs: --- *This report was automatically generated by the Python Data Visualization Generator workflow.* + *Historical trending data is stored in cache-memory for continuous analysis across runs.* ``` ## Key Reminders - - ✅ **Generate Random Data**: Use NumPy to create interesting sample data + - ✅ **Check Cache First**: Look for historical trending data in `/tmp/gh-aw/cache-memory/trending/` + - ✅ **Append to History**: Add new data points using JSON Lines format + - ✅ **Create Trends**: Generate trend charts if historical data exists - ✅ **Upload Charts**: Use the `upload asset` tool for each chart - ✅ **Embed Images**: Include uploaded chart URLs in the markdown discussion - ✅ **High Quality**: Use DPI 300, clear labels, and seaborn styling + - ✅ **Document Cache**: Report on cache status and trending capabilities - Refer to the Python Data Visualization Guide (imported above) for complete examples, code patterns, and best practices. + Refer to the Charts with Trending Guide (imported above) for complete examples, trending patterns, cache-memory integration, and best practices. PROMPT_EOF - name: Append XPIA security instructions to prompt diff --git a/.github/workflows/python-data-charts.md b/.github/workflows/python-data-charts.md index 4f26109d1c6..5bb8a2f8987 100644 --- a/.github/workflows/python-data-charts.md +++ b/.github/workflows/python-data-charts.md @@ -11,7 +11,7 @@ tools: agentic-workflows: edit: imports: - - shared/python-dataviz.md + - shared/charts-with-trending.md safe-outputs: upload-assets: create-discussion: @@ -22,11 +22,11 @@ timeout-minutes: 15 # Python Data Visualization Generator -You are a data visualization expert specializing in Python-based chart generation using scientific computing libraries. +You are a data visualization expert specializing in Python-based chart generation using scientific computing libraries with trending analysis capabilities. ## Mission -Generate high-quality data visualizations with random sample data, upload charts as assets, and create a discussion with embedded images. +Generate high-quality data visualizations with sample data, track trending metrics using cache-memory, upload charts as assets, and create a discussion with embedded images. ## Current Context @@ -40,58 +40,77 @@ The Python data visualization environment has been set up with: - **Working Directory**: `/tmp/gh-aw/python/` - **Data Directory**: `/tmp/gh-aw/python/data/` - **Charts Directory**: `/tmp/gh-aw/python/charts/` -- **Cache Memory**: `/tmp/gh-aw/cache-memory/` +- **Cache Memory**: `/tmp/gh-aw/cache-memory/` (for trending data persistence) -See the shared Python Data Visualization Guide (imported above) for detailed usage instructions, best practices, and examples. +See the Charts with Trending Guide (imported above) for detailed usage instructions, best practices, trending patterns, and complete examples. ## Task Overview -### Phase 1: Generate Sample Data +### Phase 1: Check Cache for Historical Data -1. Generate random sample data using NumPy with interesting patterns (e.g., trends, distributions, correlations) -2. Save the data to `/tmp/gh-aw/python/data/` as CSV files -3. Document the data generation process +1. Check `/tmp/gh-aw/cache-memory/trending/` for existing trending data +2. Load any historical metrics to show trend progression +3. Document what historical data exists (if any) -### Phase 2: Create Visualizations +### Phase 2: Generate or Collect Sample Data -1. Create multiple chart types to showcase the data: - - Bar chart - - Line chart - - Scatter plot - - Distribution plot +1. Generate new sample data using NumPy with interesting patterns OR +2. Collect actual metrics from the repository using GitHub API +3. Save the data to `/tmp/gh-aw/python/data/` as CSV or JSON files +4. Document the data generation/collection process -2. Save all charts to `/tmp/gh-aw/python/charts/` with descriptive filenames +### Phase 3: Update Cache with New Data -3. Ensure high quality settings (DPI 300, clear labels, seaborn styling) +1. Append new data points to `/tmp/gh-aw/cache-memory/trending//history.jsonl` +2. Use JSON Lines format (one JSON object per line) +3. Include timestamp, metric name, value, and metadata +4. Create the directory structure if it doesn't exist -### Phase 3: Upload Charts as Assets +### Phase 4: Create Trending Visualizations + +1. Create trend charts showing data over time (if historical data exists): + - Time-series line charts with multiple metrics + - Moving averages to show smoothed trends + - Comparative trend analysis + +2. Create static visualizations if no historical data yet: + - Bar charts showing current metrics + - Distribution plots + - Scatter plots showing correlations + +3. Save all charts to `/tmp/gh-aw/python/charts/` with descriptive filenames + +4. Ensure high quality settings (DPI 300, clear labels, seaborn styling) + +### Phase 5: Upload Charts as Assets 1. Upload each generated chart using the `upload asset` tool 2. Collect the returned URLs for each chart 3. The assets will be published to an orphaned git branch -### Phase 4: Create Discussion Report +### Phase 6: Create Discussion Report Create a discussion with the following structure, including the uploaded chart images: -**Title**: "📊 Data Visualization Report - Random Sample Data" +**Title**: "📊 Data Visualization Report - Trending Analysis" **Content**: ```markdown -# 📊 Data Visualization Report +# 📊 Data Visualization & Trending Report Generated on: [current date] ## Summary -This report contains data visualizations generated from randomly generated sample data using Python scientific computing libraries. +This report contains data visualizations and trending analysis generated using Python scientific computing libraries with persistent cache-memory for historical tracking. -## Generated Visualizations +## Trending Metrics -### Chart 1: [Chart Type] -![Chart 1 Description](URL_FROM_UPLOAD_ASSET) +![Trending Chart 1](URL_FROM_UPLOAD_ASSET) -[Brief description of what this chart shows] +[Analysis of trends shown: progression over time, moving averages, notable patterns] + +## Additional Visualizations ### Chart 2: [Chart Type] ![Chart 2 Description](URL_FROM_UPLOAD_ASSET) @@ -103,17 +122,20 @@ This report contains data visualizations generated from randomly generated sampl [Brief description of what this chart shows] -### Chart 4: [Chart Type] -![Chart 4 Description](URL_FROM_UPLOAD_ASSET) - -[Brief description of what this chart shows] - ## Data Information -- **Data Generation**: Random sample data using NumPy +- **Data Source**: [Random sample / GitHub API / Other] - **Sample Size**: [number of data points] - **Variables**: [list of variables/columns] - **Patterns**: [describe any patterns in the data] +- **Historical Data Points**: [count if trending data exists] +- **Tracking Period**: [date range if historical data exists] + +## Cache Memory Status + +- **Cache Location**: `/tmp/gh-aw/cache-memory/trending/` +- **Metrics Tracked**: [list of metrics being tracked] +- **Persistence**: Data persists across workflow runs via GitHub Actions cache ## Libraries Used @@ -132,14 +154,18 @@ This report contains data visualizations generated from randomly generated sampl --- *This report was automatically generated by the Python Data Visualization Generator workflow.* +*Historical trending data is stored in cache-memory for continuous analysis across runs.* ``` ## Key Reminders -- ✅ **Generate Random Data**: Use NumPy to create interesting sample data +- ✅ **Check Cache First**: Look for historical trending data in `/tmp/gh-aw/cache-memory/trending/` +- ✅ **Append to History**: Add new data points using JSON Lines format +- ✅ **Create Trends**: Generate trend charts if historical data exists - ✅ **Upload Charts**: Use the `upload asset` tool for each chart - ✅ **Embed Images**: Include uploaded chart URLs in the markdown discussion - ✅ **High Quality**: Use DPI 300, clear labels, and seaborn styling +- ✅ **Document Cache**: Report on cache status and trending capabilities -Refer to the Python Data Visualization Guide (imported above) for complete examples, code patterns, and best practices. +Refer to the Charts with Trending Guide (imported above) for complete examples, trending patterns, cache-memory integration, and best practices. diff --git a/.github/workflows/shared/charts-with-trending.md b/.github/workflows/shared/charts-with-trending.md new file mode 100644 index 00000000000..086329a82f0 --- /dev/null +++ b/.github/workflows/shared/charts-with-trending.md @@ -0,0 +1,419 @@ +--- +# Charts with Trending - Shared Agentic Workflow +# Provides complete setup for generating charts with trending analysis and cache-memory +# +# Usage: +# imports: +# - shared/charts-with-trending.md +# +# This import provides: +# - Python data visualization environment with scientific libraries +# - Trending analysis capabilities and best practices +# - Cache-memory integration for persistent trending data +# - Asset upload configuration for embedding charts in discussions/issues +# +# Key Features: +# - Automatic cache-memory for storing historical trending data +# - Python environment with NumPy, Pandas, Matplotlib, Seaborn, SciPy +# - Helper functions for loading/saving trending data +# - Best practices for creating impactful trend visualizations + +imports: + - shared/python-dataviz.md + - shared/trends.md + +tools: + cache-memory: + key: charts-trending-${{ github.workflow }}-${{ github.run_id }} +--- + +# Charts with Trending - Complete Guide + +This shared workflow provides everything you need to create compelling trend visualizations with persistent data storage. + +## Cache-Memory for Trending Data + +You have access to persistent cache-memory at `/tmp/gh-aw/cache-memory/` that survives across workflow runs. Use it to store historical trending data. + +### Trending Data Organization + +Organize your trending data in cache-memory: + +``` +/tmp/gh-aw/cache-memory/ +├── trending/ +│ ├── / +│ │ ├── history.jsonl # Time-series data (JSON Lines format) +│ │ ├── metadata.json # Data schema and descriptions +│ │ └── last_updated.txt # Timestamp of last update +│ └── index.json # Index of all tracked metrics +``` + +### Helper Functions for Trending Data + +**Load Historical Data:** +```bash +# Check if historical data exists +if [ -f /tmp/gh-aw/cache-memory/trending/issues/history.jsonl ]; then + echo "Loading historical issue trending data..." + cp /tmp/gh-aw/cache-memory/trending/issues/history.jsonl /tmp/gh-aw/python/data/ +else + echo "No historical data found. Starting fresh." + mkdir -p /tmp/gh-aw/cache-memory/trending/issues +fi +``` + +**Append New Data:** +```python +import json +from datetime import datetime + +# New data point +data_point = { + "timestamp": datetime.now().isoformat(), + "metric": "issue_count", + "value": 42, + "metadata": {"source": "github_api"} +} + +# Append to history (JSON Lines format) +with open('/tmp/gh-aw/cache-memory/trending/issues/history.jsonl', 'a') as f: + f.write(json.dumps(data_point) + '\n') +``` + +**Load All Historical Data for Analysis:** +```python +import pandas as pd +import json + +# Load all historical data +data_points = [] +history_file = '/tmp/gh-aw/cache-memory/trending/issues/history.jsonl' + +if os.path.exists(history_file): + with open(history_file, 'r') as f: + for line in f: + data_points.append(json.loads(line)) + + # Convert to DataFrame for analysis + df = pd.DataFrame(data_points) + df['timestamp'] = pd.to_datetime(df['timestamp']) + df = df.sort_values('timestamp') +else: + df = pd.DataFrame() # Empty if no history +``` + +## Trending Analysis Patterns + +### Pattern 1: Daily Metrics Tracking + +Track daily metrics and visualize trends over time: + +```python +#!/usr/bin/env python3 +""" +Daily metrics trending example +""" +import pandas as pd +import matplotlib.pyplot as plt +import seaborn as sns +import json +import os +from datetime import datetime + +# Set style +sns.set_style("whitegrid") +sns.set_palette("husl") + +# Load historical data +history_file = '/tmp/gh-aw/cache-memory/trending/daily_metrics/history.jsonl' +if os.path.exists(history_file): + data = pd.read_json(history_file, lines=True) + data['date'] = pd.to_datetime(data['timestamp']).dt.date +else: + data = pd.DataFrame() + +# Add today's data +today_data = { + "timestamp": datetime.now().isoformat(), + "issues_opened": 5, + "issues_closed": 3, + "prs_merged": 2 +} + +# Append to history +os.makedirs(os.path.dirname(history_file), exist_ok=True) +with open(history_file, 'a') as f: + f.write(json.dumps(today_data) + '\n') + +# Reload with today's data +data = pd.read_json(history_file, lines=True) +data['date'] = pd.to_datetime(data['timestamp']).dt.date +daily_stats = data.groupby('date').sum() + +# Create trend chart +fig, ax = plt.subplots(figsize=(12, 7), dpi=300) +daily_stats.plot(ax=ax, marker='o', linewidth=2) +ax.set_title('Daily Metrics Trends', fontsize=16, fontweight='bold') +ax.set_xlabel('Date', fontsize=12) +ax.set_ylabel('Count', fontsize=12) +ax.legend(loc='best') +ax.grid(True, alpha=0.3) +plt.xticks(rotation=45) +plt.tight_layout() + +plt.savefig('/tmp/gh-aw/python/charts/daily_metrics_trend.png', + dpi=300, bbox_inches='tight', facecolor='white') + +print(f"Chart saved. Total data points: {len(data)}") +``` + +### Pattern 2: Moving Averages and Smoothing + +```python +# Calculate 7-day moving average +df['rolling_avg'] = df['value'].rolling(window=7, min_periods=1).mean() + +# Plot with trend line +fig, ax = plt.subplots(figsize=(12, 7), dpi=300) +ax.plot(df['date'], df['value'], label='Actual', alpha=0.5, marker='o') +ax.plot(df['date'], df['rolling_avg'], label='7-day Average', linewidth=2.5) +ax.fill_between(df['date'], df['value'], df['rolling_avg'], alpha=0.2) +``` + +### Pattern 3: Comparative Trends + +```python +# Compare multiple metrics over time +fig, ax = plt.subplots(figsize=(14, 8), dpi=300) + +for metric in ['metric_a', 'metric_b', 'metric_c']: + metric_data = df[df['metric'] == metric] + ax.plot(metric_data['timestamp'], metric_data['value'], + marker='o', label=metric, linewidth=2) + +ax.set_title('Comparative Metrics Trends', fontsize=16, fontweight='bold') +ax.legend(loc='best', fontsize=12) +ax.grid(True, alpha=0.3) +plt.xticks(rotation=45) +``` + +## Best Practices for Cache-Memory Trending + +### 1. Use JSON Lines Format + +JSON Lines (`.jsonl`) is ideal for append-only trending data: +- One JSON object per line +- Easy to append new data +- Efficient for time-series data +- Simple to load with pandas: `pd.read_json(file, lines=True)` + +### 2. Include Metadata + +Store metadata alongside data: +```json +{ + "metric_name": "issue_resolution_time", + "unit": "hours", + "description": "Average time to close issues", + "started_tracking": "2024-01-01", + "updated": "2024-03-15" +} +``` + +### 3. Maintain Index + +Keep an index of all tracked metrics: +```json +{ + "metrics": [ + "issue_count", + "pr_count", + "commit_count", + "test_coverage" + ], + "last_updated": "2024-03-15T10:30:00Z" +} +``` + +### 4. Data Retention Strategy + +Implement retention policies to prevent unbounded growth: +```python +# Keep only last 90 days +cutoff_date = datetime.now() - timedelta(days=90) +df = df[df['timestamp'] >= cutoff_date] + +# Save pruned data +df.to_json('/tmp/gh-aw/cache-memory/trending/history.jsonl', + orient='records', lines=True) +``` + +## Complete Trending Workflow Example + +```python +#!/usr/bin/env python3 +""" +Complete trending analysis workflow +Collects data, updates history, generates trend charts +""" +import pandas as pd +import matplotlib.pyplot as plt +import seaborn as sns +import json +import os +from datetime import datetime, timedelta + +# Configuration +CACHE_DIR = '/tmp/gh-aw/cache-memory/trending' +METRIC_NAME = 'github_activity' +HISTORY_FILE = f'{CACHE_DIR}/{METRIC_NAME}/history.jsonl' +CHARTS_DIR = '/tmp/gh-aw/python/charts' + +# Ensure directories exist +os.makedirs(f'{CACHE_DIR}/{METRIC_NAME}', exist_ok=True) +os.makedirs(CHARTS_DIR, exist_ok=True) + +# Collect today's data (example) +today_data = { + "timestamp": datetime.now().isoformat(), + "issues_opened": 8, + "prs_merged": 12, + "commits": 45, + "contributors": 6 +} + +# Append to history +with open(HISTORY_FILE, 'a') as f: + f.write(json.dumps(today_data) + '\n') + +# Load all historical data +df = pd.read_json(HISTORY_FILE, lines=True) +df['date'] = pd.to_datetime(df['timestamp']).dt.date +df = df.sort_values('timestamp') + +# Aggregate by date +daily_stats = df.groupby('date').sum() + +# Generate trend chart +sns.set_style("whitegrid") +sns.set_palette("husl") + +fig, axes = plt.subplots(2, 2, figsize=(16, 12), dpi=300) +fig.suptitle('GitHub Activity Trends', fontsize=18, fontweight='bold') + +# Chart 1: Issues Opened +axes[0, 0].plot(daily_stats.index, daily_stats['issues_opened'], + marker='o', linewidth=2, color='#FF6B6B') +axes[0, 0].set_title('Issues Opened', fontsize=14) +axes[0, 0].grid(True, alpha=0.3) + +# Chart 2: PRs Merged +axes[0, 1].plot(daily_stats.index, daily_stats['prs_merged'], + marker='s', linewidth=2, color='#4ECDC4') +axes[0, 1].set_title('PRs Merged', fontsize=14) +axes[0, 1].grid(True, alpha=0.3) + +# Chart 3: Commits +axes[1, 0].plot(daily_stats.index, daily_stats['commits'], + marker='^', linewidth=2, color='#45B7D1') +axes[1, 0].set_title('Commits', fontsize=14) +axes[1, 0].grid(True, alpha=0.3) + +# Chart 4: Contributors +axes[1, 1].plot(daily_stats.index, daily_stats['contributors'], + marker='D', linewidth=2, color='#FFA07A') +axes[1, 1].set_title('Active Contributors', fontsize=14) +axes[1, 1].grid(True, alpha=0.3) + +plt.tight_layout() +plt.savefig(f'{CHARTS_DIR}/activity_trends.png', + dpi=300, bbox_inches='tight', facecolor='white') + +print(f"✅ Trend chart generated with {len(df)} data points") +print(f"📊 Chart saved to: {CHARTS_DIR}/activity_trends.png") +print(f"💾 Historical data: {HISTORY_FILE}") +``` + +## Integration with Asset Upload and Discussions + +After generating charts, use the safe-outputs tools to share them: + +```markdown +## Example Discussion with Trending Charts + +Upload each chart using the `upload asset` tool, then create a discussion: + +**Title**: "📈 Weekly Trending Analysis - [Date]" + +**Content**: +# 📈 Trending Analysis Report + +Generated on: {date} + +## Activity Trends + +![Activity Trends](URL_FROM_UPLOAD_ASSET) + +Analysis shows: +- Issues opened: Up 15% from last week +- PR velocity: Stable at 12 PRs/day +- Commit activity: Peak on Tuesdays and Wednesdays +- Active contributors: Growing trend (+20% this month) + +## Data Summary + +- **Total data points**: {count} +- **Date range**: {start} to {end} +- **Tracking period**: {days} days + +--- + +*Generated using Charts with Trending shared workflow* +*Historical data stored in cache-memory for continuous tracking* +``` + +## Tips for Success + +1. **Consistency**: Use same metric names across runs +2. **Timestamps**: Always include ISO 8601 timestamps +3. **Validation**: Check data quality before appending +4. **Backup**: Keep metadata for data recovery +5. **Documentation**: Comment your data schemas +6. **Testing**: Validate charts before uploading +7. **Cleanup**: Implement retention policies +8. **Indexing**: Maintain metric index for discovery + +## Common Use Cases + +### Repository Activity Trends +```python +# Track: commits, PRs, issues, contributors +# Frequency: Daily +# Retention: 90 days +``` + +### Performance Metrics Trends +```python +# Track: build time, test coverage, bundle size +# Frequency: Per commit/PR +# Retention: 180 days +``` + +### Quality Metrics Trends +```python +# Track: code complexity, test failures, security alerts +# Frequency: Weekly +# Retention: 1 year +``` + +### Workflow Efficiency Trends +```python +# Track: workflow duration, token usage, success rate +# Frequency: Per run +# Retention: 30 days +``` + +--- + +Remember: The power of trending comes from consistent data collection over time. Use cache-memory to build a rich historical dataset that reveals insights and patterns!