nflplotpy/examples/validate_player_ids.py at main · jbf302/nflplotpy · GitHub

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
#!/usr/bin/env python3
"""
Player ID Validation Script

This script validates the accuracy of player ID lookups and headshot matching
to ensure we're displaying the correct player photos.

Requirements:
- nflplotpy with nfl_data_py integration
"""

import warnings

warnings.filterwarnings("ignore")

import pandas as pd
import requests
from io import BytesIO
from PIL import Image
import matplotlib.pyplot as plt

try:
    import nfl_data_py as nfl

    NFL_DATA_AVAILABLE = True
except ImportError:
    NFL_DATA_AVAILABLE = False
    print("nfl_data_py not available. Please install for full validation.")

from nflplotpy.core.urls import (
    get_player_info_by_id,
    get_player_headshot_urls,
    discover_player_id,
)


def validate_known_players():
    """Test player ID lookups for well-known players."""

    print("🔍 VALIDATING KNOWN PLAYERS")
    print("=" * 40)

    # Known player test cases (name, expected GSIS ID, expected ESPN ID)
    known_players = [
        ("Patrick Mahomes", "00-0033873", "3139477"),
        ("Josh Allen", "00-0034857", "3918298"),  # Note: Was showing wrong ID before
        ("Tom Brady", "00-0019596", "2330"),
        ("Aaron Rodgers", "00-0023459", "8439"),
        ("Lamar Jackson", "00-0031280", "3916387"),
    ]

    validation_results = []

    for name, expected_gsis, expected_espn in known_players:
        print(f"\n🏈 Testing: {name}")

        # Test name-based lookup
        name_result = discover_player_id(name)
        print(
            f"   Name lookup - GSIS: {name_result.get('gsis_id')}, ESPN: {name_result.get('espn_id')}"
        )

        # Test GSIS ID lookup
        gsis_result = get_player_info_by_id(expected_gsis, id_type="gsis")
        print(
            f"   GSIS lookup - Name: {gsis_result.get('name')}, ESPN: {gsis_result.get('espn_id')}"
        )

        # Test ESPN ID lookup
        espn_result = get_player_info_by_id(expected_espn, id_type="espn")
        print(
            f"   ESPN lookup - Name: {espn_result.get('name')}, GSIS: {espn_result.get('gsis_id')}"
        )

        # Validate headshot URL accessibility
        headshot_urls = get_player_headshot_urls(expected_espn, id_type="espn")
        headshot_accessible = False
        if headshot_urls.get("espn_full"):
            try:
                response = requests.head(headshot_urls["espn_full"], timeout=5)
                headshot_accessible = response.status_code == 200
                print(
                    f"   Headshot URL: {'✅ Accessible' if headshot_accessible else '❌ Not accessible'}"
                )
            except Exception:
                print(f"   Headshot URL: ❌ Error checking accessibility")

        # Record validation result
        validation_results.append(
            {
                "name": name,
                "expected_gsis": expected_gsis,
                "expected_espn": expected_espn,
                "name_gsis_match": name_result.get("gsis_id") == expected_gsis,
                "name_espn_match": name_result.get("espn_id") == expected_espn,
                "gsis_lookup_success": gsis_result.get("espn_id") == expected_espn,
                "espn_lookup_success": espn_result.get("gsis_id") == expected_gsis,
                "headshot_accessible": headshot_accessible,
            }
        )

    return pd.DataFrame(validation_results)


def validate_2024_qb_ids():
    """Validate player IDs from actual 2024 play-by-play data."""

    print("\n🏈 VALIDATING 2024 QB DATA")
    print("=" * 40)

    if not NFL_DATA_AVAILABLE:
        print("⚠️  nfl_data_py not available - skipping real data validation")
        return pd.DataFrame()

    try:
        # Load 2024 play-by-play data
        print("Loading 2024 play-by-play data...")
        pbp_data = nfl.import_pbp_data([2024])

        # Get top QBs by attempts
        qb_data = pbp_data[
            (pbp_data["passer_player_name"].notna())
            & (pbp_data["passer_player_id"].notna())
            & (pbp_data["season_type"] == "REG")
        ].copy()

        # Top QBs by pass attempts
        top_qbs = (
            qb_data.groupby(["passer_player_id", "passer_player_name"])
            .size()
            .nlargest(10)
        )

        validation_results = []

        print(f"\nValidating top 10 QBs by attempts...")

        for (player_id, player_name), attempts in top_qbs.items():
            print(f"\n📊 {player_name} (GSIS: {player_id}, Attempts: {attempts})")

            # Lookup player info using GSIS ID
            player_info = get_player_info_by_id(player_id, id_type="gsis")

            if player_info["espn_id"]:
                print(f"   ✅ Found ESPN ID: {player_info['espn_id']}")
                print(f"   ✅ Validated name: {player_info['name']}")

                # Test headshot URL
                headshot_urls = get_player_headshot_urls(
                    player_info["espn_id"], id_type="espn"
                )
                headshot_accessible = False

                if headshot_urls.get("espn_full"):
                    try:
                        response = requests.head(headshot_urls["espn_full"], timeout=5)
                        headshot_accessible = response.status_code == 200
                        print(
                            f"   {'✅' if headshot_accessible else '❌'} Headshot: {headshot_urls['espn_full']}"
                        )
                    except Exception as e:
                        print(f"   ❌ Headshot error: {e}")

                validation_results.append(
                    {
                        "pbp_name": player_name,
                        "gsis_id": player_id,
                        "espn_id": player_info["espn_id"],
                        "validated_name": player_info["name"],
                        "attempts": attempts,
                        "name_match": player_info["name"]
                        and player_name.lower() in player_info["name"].lower(),
                        "headshot_accessible": headshot_accessible,
                        "headshot_url": headshot_urls.get("espn_full", ""),
                    }
                )
            else:
                print(f"   ❌ No ESPN ID found")
                validation_results.append(
                    {
                        "pbp_name": player_name,
                        "gsis_id": player_id,
                        "espn_id": None,
                        "validated_name": None,
                        "attempts": attempts,
                        "name_match": False,
                        "headshot_accessible": False,
                        "headshot_url": "",
                    }
                )

        return pd.DataFrame(validation_results)

    except Exception as e:
        print(f"❌ Error validating 2024 QB data: {e}")
        return pd.DataFrame()


def create_validation_report(known_results, qb_results):
    """Create a comprehensive validation report."""

    print("\n📊 VALIDATION REPORT")
    print("=" * 50)

    if not known_results.empty:
        print("\n🔍 Known Players Validation:")
        print(
            f"   ✅ Name→GSIS matches: {known_results['name_gsis_match'].sum()}/{len(known_results)}"
        )
        print(
            f"   ✅ Name→ESPN matches: {known_results['name_espn_match'].sum()}/{len(known_results)}"
        )
        print(
            f"   ✅ GSIS lookup success: {known_results['gsis_lookup_success'].sum()}/{len(known_results)}"
        )
        print(
            f"   ✅ ESPN lookup success: {known_results['espn_lookup_success'].sum()}/{len(known_results)}"
        )
        print(
            f"   ✅ Headshots accessible: {known_results['headshot_accessible'].sum()}/{len(known_results)}"
        )

        # Show any failures
        failures = known_results[
            ~(known_results["name_gsis_match"] & known_results["name_espn_match"])
        ]
        if not failures.empty:
            print("\n❌ Failed validations:")
            for _, row in failures.iterrows():
                print(
                    f"   - {row['name']}: GSIS={row['name_gsis_match']}, ESPN={row['name_espn_match']}"
                )

    if not qb_results.empty:
        print(f"\n🏈 2024 QB Data Validation:")
        print(
            f"   ✅ Players with ESPN IDs: {qb_results['espn_id'].notna().sum()}/{len(qb_results)}"
        )
        print(f"   ✅ Name matches: {qb_results['name_match'].sum()}/{len(qb_results)}")
        print(
            f"   ✅ Accessible headshots: {qb_results['headshot_accessible'].sum()}/{len(qb_results)}"
        )

        # Show players without ESPN IDs
        missing_espn = qb_results[qb_results["espn_id"].isna()]
        if not missing_espn.empty:
            print("\n⚠️  Players missing ESPN IDs:")
            for _, row in missing_espn.iterrows():
                print(f"   - {row['pbp_name']} (GSIS: {row['gsis_id']})")

        # Show inaccessible headshots
        bad_headshots = qb_results[
            ~qb_results["headshot_accessible"] & qb_results["espn_id"].notna()
        ]
        if not bad_headshots.empty:
            print("\n⚠️  Players with inaccessible headshots:")
            for _, row in bad_headshots.iterrows():
                print(f"   - {row['validated_name']} (ESPN: {row['espn_id']})")


def test_headshot_visual_validation():
    """Create a visual grid of headshots for manual validation."""

    print("\n🖼️  CREATING VISUAL VALIDATION GRID")
    print("=" * 40)

    # Test a few known players
    test_players = [
        ("Patrick Mahomes", "3139477"),
        ("Josh Allen", "3918298"),
        ("Tom Brady", "2330"),
        ("Aaron Rodgers", "8439"),
    ]

    fig, axes = plt.subplots(2, 2, figsize=(10, 10))
    axes = axes.flatten()

    for i, (name, espn_id) in enumerate(test_players):
        ax = axes[i]

        try:
            # Get headshot URL
            urls = get_player_headshot_urls(espn_id, id_type="espn")
            if urls.get("espn_full"):
                # Download and display image
                response = requests.get(urls["espn_full"], timeout=10)
                response.raise_for_status()

                img = Image.open(BytesIO(response.content))
                ax.imshow(img)
                ax.set_title(f"{name}\nESPN ID: {espn_id}", fontsize=12)
                ax.axis("off")
            else:
                ax.text(
                    0.5,
                    0.5,
                    f"No headshot\navailable\nfor {name}",
                    ha="center",
                    va="center",
                    transform=ax.transAxes,
                )
                ax.set_title(f"{name}\nESPN ID: {espn_id}", fontsize=12)
                ax.axis("off")

        except Exception as e:
            ax.text(
                0.5,
                0.5,
                f"Error loading\n{name}\n{str(e)[:50]}...",
                ha="center",
                va="center",
                transform=ax.transAxes,
            )
            ax.set_title(f"{name} - ERROR", fontsize=12)
            ax.axis("off")

    plt.tight_layout()
    plt.savefig("examples/headshot_validation_grid.png", dpi=150, bbox_inches="tight")
    print("✅ Saved visual validation grid to: examples/headshot_validation_grid.png")
    plt.close()


def main():
    """Main validation execution."""

    print("🔧 PLAYER ID & HEADSHOT VALIDATION")
    print("=" * 50)
    print("This script validates player ID lookups and headshot accuracy")
    print("to ensure we're matching the correct players to their photos.\n")

    # Run validations
    known_results = validate_known_players()
    qb_results = validate_2024_qb_ids()

    # Create comprehensive report
    create_validation_report(known_results, qb_results)

    # Create visual validation
    test_headshot_visual_validation()

    print("\n🎉 VALIDATION COMPLETE")
    print("Review the results above and check the visual validation grid.")
    print("Any failures indicate areas that need improvement in ID matching.")


if __name__ == "__main__":
    main()