{ "cells": [ { "cell_type": "markdown", "metadata": { "id": "view-in-github", "colab_type": "text" }, "source": [ "\"Open" ] }, { "cell_type": "code", "execution_count": 1, "metadata": { "id": "raKucNuxIcQd" }, "outputs": [], "source": [ "forward_statistics =['value', 'was_home', 'last_season_position', 'percent_value',\n", " 'position rank', 'goals_scored_ex', 'assists_ex', 'total_points_ex',\n", " 'minutes_ex', 'goals_conceded_ex', 'creativity_ex', 'influence_ex',\n", " 'threat_ex', 'bonus_ex', 'bps_ex', 'ict_index_ex', 'now_cost_ex', 'GW', 'opponent_last_season_position',\n", " 'mean assists 3','mean bonus 3', 'mean bps 3','mean creativity 3', 'mean goals_scored 3',\n", " 'mean ict_index 3', 'mean influence 3', 'mean minutes 3', 'mean penalties_missed 3', 'mean threat 3',\n", " 'mean total_points 3','mean value 3', 'mean match_result 3', 'std bps 3', 'std creativity 3',\n", " 'std ict_index 3', 'std influence 3', 'std minutes 3',\n", " 'std threat 3', 'std total_points 3', 'std value 3']\n", "\n", "leak_columns = [\n", " \"name\",\n", " \"team\",\n", "] # columns that shouldnt be used in training fir fear of data leakage\n", "\n", "\n", "dropped_columns = [\n", " \"season\",\n", " \"opponent\",\n", " \"match_result\",\n", " # \"position\",\n", " \"assists\",\n", " \"penalties_missed\",\n", " \"bonus\",\n", " \"bps\",\n", " \"clean_sheets\",\n", " \"creativity\",\n", " \"goals_conceded\",\n", " \"goals_scored\",\n", " \"ict_index\",\n", " \"influence\",\n", " \"own_goals\",\n", " \"penalties_saved\",\n", " \"red_cards\",\n", " \"saves\",\n", " \"selected\",\n", " \"threat\",\n", " \"transfers_balance\",\n", " \"transfers_in\",\n", " \"transfers_out\",\n", " \"yellow_cards\",\n", " \"team Goal scored\",\n", " \"team Goal conceded\"\n", "] # \"value\",\n", "\n", "midfielder_statistics =['value', 'was_home', 'last_season_position', 'percent_value',\n", " 'position rank', 'goals_scored_ex', 'assists_ex', 'total_points_ex',\n", " 'minutes_ex', 'goals_conceded_ex', 'creativity_ex', 'influence_ex',\n", " 'threat_ex', 'bonus_ex', 'bps_ex', 'ict_index_ex', 'now_cost_ex', 'GW', 'opponent_last_season_position',\n", " 'mean assists 3','mean bonus 3', 'mean bps 3','mean creativity 3', 'mean goals_scored 3',\n", " 'mean ict_index 3', 'mean influence 3', 'mean minutes 3', 'mean penalties_missed 3', 'mean threat 3',\n", " 'mean total_points 3','mean value 3', 'mean match_result 3', 'std bps 3', 'std creativity 3',\n", " 'std ict_index 3', 'std influence 3', 'std minutes 3',\n", " 'std threat 3', 'std total_points 3', 'std value 3']\n", "\n", "goalkeeper_statistics = ['value', 'was_home', 'last_season_position', 'percent_value',\n", " 'position rank', 'total_points_ex', 'minutes_ex', 'goals_conceded_ex',\n", " 'bonus_ex', 'bps_ex', 'ict_index_ex', 'clean_sheets_ex',\n", " 'red_cards_ex', 'now_cost_ex', 'GW', 'opponent_last_season_position',\n", " 'mean bonus 3', 'mean bps 3', 'mean clean_sheets 3', 'mean goals_conceded 3',\n", " 'mean ict_index 3', 'mean minutes 3',\n", " 'mean own_goals 3', 'mean penalties_saved 3',\n", " 'mean saves 3', 'mean threat 3',\n", " 'mean total_points 3',\n", " 'mean value 3', 'mean match_result 3', 'std bps 3',\n", " 'std ict_index 3', 'std influence 3', 'std minutes 3',\n", " 'std threat 3', 'std total_points 3', 'std value 3']\n", "\n", "statistics =['value', 'position','was_home', 'last_season_position', 'percent_value',\n", " 'position rank', 'goals_scored_ex', 'assists_ex', 'total_points_ex',\n", " 'minutes_ex', 'goals_conceded_ex', 'creativity_ex', 'influence_ex',\n", " 'threat_ex', 'bonus_ex', 'bps_ex', 'ict_index_ex', 'clean_sheets_ex',\n", " 'yellow_cards_ex','now_cost_ex', 'GW', 'opponent_last_season_position', 'mean assists 3',\n", " 'mean bonus 3', 'mean bps 3', 'mean clean_sheets 3',\n", " 'mean creativity 3', 'mean goals_conceded 3', 'mean goals_scored 3',\n", " 'mean ict_index 3', 'mean influence 3', 'mean minutes 3',\n", " 'mean own_goals 3',\n", " 'mean red_cards 3', 'mean threat 3','mean total_points 3',\n", " 'mean value 3', 'mean match_result 3', 'std bps 3', 'std creativity 3',\n", " 'std ict_index 3', 'std influence 3', 'std minutes 3',\n", " 'std threat 3', 'std total_points 3', 'std value 3','mean saves 3','mean assists all',\n", " 'mean bonus all', 'mean bps all', 'mean clean_sheets all',\n", " 'mean creativity all', 'mean goals_conceded all', 'mean goals_scored all',\n", " 'mean ict_index all', 'mean influence all', 'mean minutes all',\n", " 'mean own_goals all',\n", " 'mean red_cards all', 'mean threat all','mean total_points all',\n", " 'mean value all', 'mean match_result all',\n", " 'mean team Goal scored 3','mean team Goal scored all','mean team Goal conceded 3','mean team Goal conceded all',\"ratio_goal_scored all\",\"ratio_goal_scored 3\",\n", " 'opp mean team Goal scored 3','opp mean team Goal conceded 3','opp mean team Goal scored all','opp mean team Goal conceded all',\"opp mean match_result all\"]\n", "\n", "\n", "date_cols=[\"day_of week\",\"month\",\"hour\",\"week\"]" ] }, { "cell_type": "code", "execution_count": 2, "metadata": { "id": "n42gWU8iME6S" }, "outputs": [], "source": [ "def convert_minutes(val):\n", " \"\"\"CONVERTS MINUTES TO A CATEGORICAL OUTPUT\"\"\"\n", " if val > 10:\n", " return 1\n", " else:\n", " return 0" ] }, { "cell_type": "code", "execution_count": 3, "metadata": { "colab": { "base_uri": "https://localhost:8080/" }, "id": "_NfkZfXAnuRr", "outputId": "96fd14fb-21c0-4318-a883-589d7915a124" }, "outputs": [ { "output_type": "stream", "name": "stdout", "text": [ "Collecting catboost\n", " Downloading catboost-1.2.2-cp310-cp310-manylinux2014_x86_64.whl (98.7 MB)\n", "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m98.7/98.7 MB\u001b[0m \u001b[31m7.6 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", "\u001b[?25hRequirement already satisfied: graphviz in /usr/local/lib/python3.10/dist-packages (from catboost) (0.20.1)\n", "Requirement already satisfied: matplotlib in /usr/local/lib/python3.10/dist-packages (from catboost) (3.7.1)\n", "Requirement already satisfied: numpy>=1.16.0 in /usr/local/lib/python3.10/dist-packages (from catboost) (1.23.5)\n", "Requirement already satisfied: pandas>=0.24 in /usr/local/lib/python3.10/dist-packages (from catboost) (1.5.3)\n", "Requirement already satisfied: scipy in /usr/local/lib/python3.10/dist-packages (from catboost) (1.11.3)\n", "Requirement already satisfied: plotly in /usr/local/lib/python3.10/dist-packages (from catboost) (5.15.0)\n", "Requirement already satisfied: six in /usr/local/lib/python3.10/dist-packages (from catboost) (1.16.0)\n", "Requirement already satisfied: python-dateutil>=2.8.1 in /usr/local/lib/python3.10/dist-packages (from pandas>=0.24->catboost) (2.8.2)\n", "Requirement already satisfied: pytz>=2020.1 in /usr/local/lib/python3.10/dist-packages (from pandas>=0.24->catboost) (2023.3.post1)\n", "Requirement already satisfied: contourpy>=1.0.1 in /usr/local/lib/python3.10/dist-packages (from matplotlib->catboost) (1.1.1)\n", "Requirement already satisfied: cycler>=0.10 in /usr/local/lib/python3.10/dist-packages (from matplotlib->catboost) (0.12.1)\n", "Requirement already satisfied: fonttools>=4.22.0 in /usr/local/lib/python3.10/dist-packages (from matplotlib->catboost) (4.43.1)\n", "Requirement already satisfied: kiwisolver>=1.0.1 in /usr/local/lib/python3.10/dist-packages (from matplotlib->catboost) (1.4.5)\n", "Requirement already satisfied: packaging>=20.0 in /usr/local/lib/python3.10/dist-packages (from matplotlib->catboost) (23.2)\n", "Requirement already satisfied: pillow>=6.2.0 in /usr/local/lib/python3.10/dist-packages (from matplotlib->catboost) (9.4.0)\n", "Requirement already satisfied: pyparsing>=2.3.1 in /usr/local/lib/python3.10/dist-packages (from matplotlib->catboost) (3.1.1)\n", "Requirement already satisfied: tenacity>=6.2.0 in /usr/local/lib/python3.10/dist-packages (from plotly->catboost) (8.2.3)\n", "Installing collected packages: catboost\n", "Successfully installed catboost-1.2.2\n" ] } ], "source": [ "pip install catboost" ] }, { "cell_type": "code", "execution_count": 4, "metadata": { "id": "4fUTYkPMKCnU" }, "outputs": [], "source": [ "from hyperopt import tpe,hp,fmin,STATUS_OK,Trials\n", "from hyperopt.pyll.base import scope" ] }, { "cell_type": "code", "execution_count": 5, "metadata": { "id": "_x6NGUGwIo3O" }, "outputs": [], "source": [ "import pandas as pd\n", "import numpy as np\n", "import warnings\n", "import os\n", "\n", "\n", "warnings.filterwarnings(\"ignore\")\n", "from sklearn.model_selection import train_test_split\n", "from sklearn.pipeline import Pipeline\n", "from sklearn.ensemble import (\n", " RandomForestClassifier,\n", " RandomForestRegressor,\n", " GradientBoostingRegressor,\n", ")\n", "from sklearn.preprocessing import StandardScaler, RobustScaler, MinMaxScaler\n", "from sklearn.impute import SimpleImputer\n", "from sklearn.linear_model import LinearRegression, Lasso, Ridge\n", "from sklearn.metrics import (\n", " mean_squared_error,\n", " mean_absolute_error,\n", " confusion_matrix,\n", " accuracy_score,\n", " f1_score,\n", ")\n", "from lightgbm import LGBMRegressor, LGBMClassifier\n", "from catboost import CatBoostClassifier, CatBoostRegressor\n", "from sklearn.model_selection import StratifiedKFold,KFold" ] }, { "cell_type": "code", "execution_count": 6, "metadata": { "id": "NLHG8cabKXAo" }, "outputs": [], "source": [ "gameweek=10" ] }, { "cell_type": "code", "execution_count": 7, "metadata": { "id": "KAEVPF2eKiR2" }, "outputs": [], "source": [ "train = pd.read_csv(\"/content/drive/MyDrive/Fplpredict /cleaned_dataset/cleaned_previous_seasons.csv\",index_col=0)#/content/drive/MyDrive/Fplpredict /cleaned_previous_seasons.csv\", index_col=0)\n", "old_gameweek_cleaned = []\n", "for i in range(1, gameweek):\n", " old_gameweek_cleaned.append(pd.read_csv(f\"/content/drive/MyDrive/Fplpredict /cleaned_dataset/2023-24/GW{i}.csv\"))\n", "old_gameweeks = pd.concat(old_gameweek_cleaned)[train.columns]\n", "train = pd.concat([train, old_gameweeks])\n", "\n", "# data for current gameweek we want to predict on\n", "test = pd.read_csv(f\"/content/drive/MyDrive/Fplpredict /cleaned_dataset/2023-24/GW{gameweek}.csv\", index_col=0)" ] }, { "cell_type": "code", "execution_count": 8, "metadata": { "colab": { "base_uri": "https://localhost:8080/" }, "id": "ldHF-c7SKu67", "outputId": "f373d7e3-511c-4a51-a5c1-a0d38983d2e9" }, "outputs": [ { "output_type": "stream", "name": "stdout", "text": [ "Folder /content/drive/MyDrive/Fplpredict /predicted_dataset/GW10 already exists\n" ] } ], "source": [ "#CREATE GAMEWEEK PREDICTIONS FOLDER\n", "path = f\"/content/drive/MyDrive/Fplpredict /predicted_dataset/GW{gameweek}\"\n", "\n", "# create new single directory\n", "# check whether directory already exists\n", "if not os.path.exists(path):\n", " os.mkdir(path)\n", " print(\"Folder %s created!\" % path)\n", "else:\n", " print(\"Folder %s already exists\" % path)\n", "\n", "# data from current season but previous gameweeks" ] }, { "cell_type": "code", "execution_count": 9, "metadata": { "colab": { "base_uri": "https://localhost:8080/" }, "id": "lGLtePuRjy7Y", "outputId": "64fa7c9e-daa3-45c6-c5df-8947ba4706f5" }, "outputs": [ { "output_type": "execute_result", "data": { "text/plain": [ "MID 38456\n", "DEF 31804\n", "FWD 11640\n", "GK 9522\n", "GKP 761\n", "Name: position, dtype: int64" ] }, "metadata": {}, "execution_count": 9 } ], "source": [ "train[\"position\"].value_counts()" ] }, { "cell_type": "code", "execution_count": 10, "metadata": { "id": "bsJ6_Ruej9Gl" }, "outputs": [], "source": [ "train[\"position\"]=train[\"position\"].replace({\"GKP\":\"GK\" })" ] }, { "cell_type": "code", "execution_count": 11, "metadata": { "id": "FGI39XN-kYzz" }, "outputs": [], "source": [ "test = test[train.columns]\n", "train_copy = train.copy()\n", "test_copy = test.copy()\n", "\n", "train[\"index\"] = train[\"name\"] + train[\"kickoff_time\"].astype(\"str\")\n", "train.drop_duplicates(\"index\", keep=\"last\", inplace=True)\n", "train= train.set_index(\"index\")\n", "train[\"date\"]=pd.to_datetime(train[\"kickoff_time\"])\n", "train[\"day_of week\"]=train[\"date\"].dt.day_name\n", "train[\"month\"]=train[\"date\"].dt.month\n", "train[\"hour\"]=train[\"date\"].dt.hour\n", "train[\"week\"]=train[\"date\"].dt.week\n", "train.drop([\"kickoff_time\",\"date\"], axis=1, inplace=True)\n", "\n", "test[\"index\"] = test[\"name\"] + test[\"kickoff_time\"].astype(\"str\")\n", "test= test.set_index(\"index\")\n", "test[\"date\"]=pd.to_datetime(test[\"kickoff_time\"])\n", "test[\"day_of week\"]=test[\"date\"].dt.day_name\n", "test[\"month\"]=test[\"date\"].dt.month\n", "test[\"hour\"]=test[\"date\"].dt.hour\n", "test[\"week\"]=test[\"date\"].dt.week\n", "test.drop([\"kickoff_time\",\"date\"], axis=1, inplace=True)\n", "train[\"minutes\"] = train[\"minutes\"].apply(convert_minutes)\n", "\n", "target = train[[\"minutes\", \"GW\",\"position\" ]]\n", "train.drop([\"total_points\", \"minutes\"], axis=1, inplace=True)\n", "test.drop([\"total_points\", \"minutes\"], axis=1, inplace=True)\n", "train.drop(dropped_columns, axis=1, inplace=True)\n", "test.drop(dropped_columns, axis=1, inplace=True)\n", "\n", "\n", "# Convert categorical columns to numerical\n", "for col in train.columns:\n", " if train[col].dtype == \"object\":\n", " if col not in [\"team\", \"name\",\"position\"]:\n", " train[col] = pd.factorize(train[col])[0]\n", " test[col] = pd.factorize(test[col])[0]\n", "\n", "train[\"was_home\"] = train[\"was_home\"].replace({True: 0, False: 1})\n", "test[\"was_home\"] = test[\"was_home\"].replace({True: 0, False: 1})\n", "\n", "train = train[statistics + leak_columns+date_cols]\n", "test = test[statistics + leak_columns+date_cols]\n", "\n", "x, val, y, y_val = train_test_split(\n", " train.drop(leak_columns, axis=1),\n", " target[\"minutes\"],\n", " test_size=0.1,\n", " random_state=0,\n", ")" ] }, { "cell_type": "code", "execution_count": 12, "metadata": { "id": "Pc5EqQqLn8ZN" }, "outputs": [], "source": [ "params={'colsample_bylevel': 0.8070621518153563, 'learning_rate': 0.04765984972709895, 'max_depth': 7, 'reg_lambda': 5, 'scale_pos_weight': 2.5,'subsample': 0.6794390204583894}\n", "model=CatBoostClassifier(**params,cat_features=[\"position\"],random_state=0,early_stopping_rounds=500,use_best_model=True,verbose=500,n_estimators=10000)" ] }, { "cell_type": "code", "execution_count": 13, "metadata": { "colab": { "base_uri": "https://localhost:8080/" }, "id": "ORnl_n6fnJut", "outputId": "781e1595-7059-4828-f514-76714abca6b9" }, "outputs": [ { "output_type": "stream", "name": "stdout", "text": [ "0:\tlearn: 0.6548918\ttest: 0.6549632\tbest: 0.6549632 (0)\ttotal: 386ms\tremaining: 1h 4m 24s\n", "500:\tlearn: 0.3003844\ttest: 0.3266284\tbest: 0.3266046 (492)\ttotal: 1m 10s\tremaining: 22m 23s\n", "1000:\tlearn: 0.2741653\ttest: 0.3266822\tbest: 0.3261758 (797)\ttotal: 2m 14s\tremaining: 20m 6s\n", "Stopped by overfitting detector (500 iterations wait)\n", "\n", "bestTest = 0.3261757901\n", "bestIteration = 797\n", "\n", "Shrink model to first 798 iterations.\n" ] }, { "output_type": "execute_result", "data": { "text/plain": [ "" ] }, "metadata": {}, "execution_count": 13 } ], "source": [ "model.fit(x, y,eval_set=[(val,y_val)])" ] }, { "cell_type": "code", "execution_count": 14, "metadata": { "colab": { "base_uri": "https://localhost:8080/" }, "id": "ZD0zoDWto5j0", "outputId": "23b3cd81-a9fc-4512-bd9a-3f3c33c03489" }, "outputs": [ { "output_type": "stream", "name": "stdout", "text": [ "[[3820 271]\n", " [1081 2992]]\n", "starting Accuracy score 0.8343949044585988\n", " starting f1 score: 0.8157033805888768\n" ] } ], "source": [ "print(confusion_matrix(model.predict(val), y_val))\n", "print(\n", " f\"starting Accuracy score {accuracy_score(model.predict(val), y_val)}\"\n", ")\n", "\n", "print(\n", " f\" starting f1 score: {f1_score(model.predict(val), y_val)}\"\n", ")\n", "\n", "feature_importance = pd.DataFrame(\n", " {\"column\": x.columns, \"imp\": model.feature_importances_}\n", ").sort_values(\n", " \"imp\", ascending=False\n", ") #" ] }, { "cell_type": "markdown", "metadata": { "id": "IaWuoInwvtp2" }, "source": [ "[[3863 267]\n", " [ 975 2862]]\n", "starting Accuracy score 0.8441069411321702\n", " starting f1 score: 0.8217054263565893" ] }, { "cell_type": "code", "execution_count": 15, "metadata": { "colab": { "base_uri": "https://localhost:8080/", "height": 1000 }, "id": "a0UEd4_cTO4r", "outputId": "d743f842-36d0-4c02-fd55-32d308bcf22d" }, "outputs": [ { "output_type": "execute_result", "data": { "text/plain": [ " column imp\n", "31 mean minutes 3 12.006206\n", "56 mean minutes all 4.429009\n", "42 std minutes 3 4.424185\n", "29 mean ict_index 3 3.527693\n", "20 GW 3.149277\n", "0 value 2.951257\n", "51 mean creativity all 2.918791\n", "55 mean influence all 2.890636\n", "4 percent_value 2.684590\n", "35 mean total_points 3 2.645315\n", "5 position rank 2.590643\n", "60 mean total_points all 2.347154\n", "61 mean value all 2.160690\n", "52 mean goals_conceded all 2.132516\n", "54 mean ict_index all 2.010548\n", "49 mean bps all 1.964515\n", "30 mean influence 3 1.890172\n", "3 last_season_position 1.801461\n", "1 position 1.758095\n", "66 mean team Goal conceded all 1.670793\n", "43 std threat 3 1.615126\n", "77 week 1.371841\n", "24 mean bps 3 1.345328\n", "64 mean team Goal scored all 1.318488\n", "59 mean threat all 1.240764\n", "73 opp mean match_result all 1.173817\n", "17 clean_sheets_ex 1.133980\n", "9 minutes_ex 1.048477\n", "50 mean clean_sheets all 1.018635\n", "75 month 1.003420\n", "72 opp mean team Goal conceded all 0.998519\n", "62 mean match_result all 0.974349\n", "71 opp mean team Goal scored all 0.945848\n", "65 mean team Goal conceded 3 0.930259\n", "11 creativity_ex 0.865671\n", "41 std influence 3 0.853334\n", "26 mean creativity 3 0.847830\n", "39 std creativity 3 0.837664\n", "63 mean team Goal scored 3 0.827717\n", "38 std bps 3 0.735068\n", "19 now_cost_ex 0.734875\n", "48 mean bonus all 0.719231\n", "8 total_points_ex 0.709128\n", "12 influence_ex 0.705810\n", "10 goals_conceded_ex 0.692582\n", "16 ict_index_ex 0.689631\n", "27 mean goals_conceded 3 0.689055\n", "37 mean match_result 3 0.680407\n", "36 mean value 3 0.648196\n", "45 std value 3 0.645090" ], "text/html": [ "\n", "
\n", "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
columnimp
31mean minutes 312.006206
56mean minutes all4.429009
42std minutes 34.424185
29mean ict_index 33.527693
20GW3.149277
0value2.951257
51mean creativity all2.918791
55mean influence all2.890636
4percent_value2.684590
35mean total_points 32.645315
5position rank2.590643
60mean total_points all2.347154
61mean value all2.160690
52mean goals_conceded all2.132516
54mean ict_index all2.010548
49mean bps all1.964515
30mean influence 31.890172
3last_season_position1.801461
1position1.758095
66mean team Goal conceded all1.670793
43std threat 31.615126
77week1.371841
24mean bps 31.345328
64mean team Goal scored all1.318488
59mean threat all1.240764
73opp mean match_result all1.173817
17clean_sheets_ex1.133980
9minutes_ex1.048477
50mean clean_sheets all1.018635
75month1.003420
72opp mean team Goal conceded all0.998519
62mean match_result all0.974349
71opp mean team Goal scored all0.945848
65mean team Goal conceded 30.930259
11creativity_ex0.865671
41std influence 30.853334
26mean creativity 30.847830
39std creativity 30.837664
63mean team Goal scored 30.827717
38std bps 30.735068
19now_cost_ex0.734875
48mean bonus all0.719231
8total_points_ex0.709128
12influence_ex0.705810
10goals_conceded_ex0.692582
16ict_index_ex0.689631
27mean goals_conceded 30.689055
37mean match_result 30.680407
36mean value 30.648196
45std value 30.645090
\n", "
\n", "
\n", "\n", "
\n", " \n", "\n", " \n", "\n", " \n", "
\n", "\n", "\n", "
\n", " \n", "\n", "\n", "\n", " \n", "
\n", "
\n", "
\n" ] }, "metadata": {}, "execution_count": 15 } ], "source": [ "feature_importance.head(50)" ] }, { "cell_type": "code", "execution_count": 16, "metadata": { "colab": { "base_uri": "https://localhost:8080/", "height": 676 }, "id": "W5lDoiNUpJtN", "outputId": "ea973f25-021d-4cde-fdcc-f2330599e47e" }, "outputs": [ { "output_type": "execute_result", "data": { "text/plain": [ " column imp\n", "70 opp mean team Goal conceded 3 0.465798\n", "18 yellow_cards_ex 0.455093\n", "47 mean assists all 0.426588\n", "23 mean bonus 3 0.410647\n", "57 mean own_goals all 0.402117\n", "58 mean red_cards all 0.379497\n", "67 ratio_goal_scored all 0.376669\n", "32 mean own_goals 3 0.371388\n", "76 hour 0.363850\n", "6 goals_scored_ex 0.319510\n", "7 assists_ex 0.317864\n", "34 mean threat 3 0.285440\n", "2 was_home 0.284276\n", "69 opp mean team Goal scored 3 0.257607\n", "68 ratio_goal_scored 3 0.193383\n", "53 mean goals_scored all 0.166219\n", "46 mean saves 3 0.086837\n", "22 mean assists 3 0.051928\n", "28 mean goals_scored 3 0.012596\n", "74 day_of week 0.000000" ], "text/html": [ "\n", "
\n", "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
columnimp
70opp mean team Goal conceded 30.465798
18yellow_cards_ex0.455093
47mean assists all0.426588
23mean bonus 30.410647
57mean own_goals all0.402117
58mean red_cards all0.379497
67ratio_goal_scored all0.376669
32mean own_goals 30.371388
76hour0.363850
6goals_scored_ex0.319510
7assists_ex0.317864
34mean threat 30.285440
2was_home0.284276
69opp mean team Goal scored 30.257607
68ratio_goal_scored 30.193383
53mean goals_scored all0.166219
46mean saves 30.086837
22mean assists 30.051928
28mean goals_scored 30.012596
74day_of week0.000000
\n", "
\n", "
\n", "\n", "
\n", " \n", "\n", " \n", "\n", " \n", "
\n", "\n", "\n", "
\n", " \n", "\n", "\n", "\n", " \n", "
\n", "
\n", "
\n" ] }, "metadata": {}, "execution_count": 16 } ], "source": [ "feature_importance.tail(20)" ] }, { "cell_type": "code", "execution_count": 17, "metadata": { "colab": { "base_uri": "https://localhost:8080/" }, "id": "_LNgj0zfBh5y", "outputId": "5a0e03e6-dfdd-4b5a-f895-4938ef2200af" }, "outputs": [ { "output_type": "execute_result", "data": { "text/plain": [ "index\n", "Oriol Romeu Vidal2023-01-14 15:00:00+00:00 0\n", "Ellery Balcombe2022-01-11 19:45:00+00:00 0\n", "Bukayo Saka2022-11-12 19:45:00+00:00 1\n", "Martin Dubravka2021-12-19 14:00:00+00:00 1\n", "Mark Gillespie2022-05-08 15:30:00+00:00 0\n", " ..\n", "Rodrigo Hernandez2022-05-08 15:30:00+00:00 1\n", "Romain Perraud2023-01-21 15:00:00+00:00 1\n", "Carlos Henrique Casimiro2022-11-13 16:30:00+00:00 1\n", "Kevin Long2021-03-06 12:30:00+00:00 0\n", "Cheikh Diaby2021-01-26 20:15:00+00:00 0\n", "Name: minutes, Length: 8164, dtype: int64" ] }, "metadata": {}, "execution_count": 17 } ], "source": [ "y_val" ] }, { "cell_type": "code", "execution_count": 18, "metadata": { "id": "mZrjfnMwpYAP" }, "outputs": [], "source": [ "test_copy[\"minutes\"] = model.predict(test.drop(leak_columns, axis=1))\n", "test_copy[leak_columns + [\"minutes\"]].to_csv(\n", " f\"minutes.csv\"\n", ")" ] }, { "cell_type": "code", "execution_count": 19, "metadata": { "id": "Crlq1n1JBd5b" }, "outputs": [], "source": [ "predicted_minutes=model.predict(val)\n", "val_=pd.DataFrame({\"ind\":val.index,\"actul_minutes\":y_val,\"predicted_minutes\":predicted_minutes,\"position\":val[\"position\"]})" ] }, { "cell_type": "code", "execution_count": 20, "metadata": { "colab": { "base_uri": "https://localhost:8080/" }, "id": "DBc8-KNlDbeG", "outputId": "e64dd990-35b7-4f28-b37c-d3ac024ee6ea" }, "outputs": [ { "output_type": "stream", "name": "stdout", "text": [ "STARTING GOALKEEPERS PERFORMANCE!!!\n", "[[618 36]\n", " [ 18 241]]\n", "starting Accuracy score 0.940854326396495\n", " starting f1 score: 0.8992537313432836\n" ] } ], "source": [ "print(f\"STARTING GOALKEEPERS PERFORMANCE!!!\")\n", "a=val_[val_[\"position\"]==\"GK\"][\"actul_minutes\"]\n", "b=val_[val_[\"position\"]==\"GK\"][\"predicted_minutes\"]\n", "print(confusion_matrix(a,b))\n", "print(\n", " f\"starting Accuracy score {accuracy_score(a,b)}\"\n", ")\n", "\n", "print(\n", " f\" starting f1 score: {f1_score(a,b)}\"\n", ")" ] }, { "cell_type": "markdown", "metadata": { "id": "z3zLvWZmC5Vh" }, "source": [ "STARTING GOALKEEPERS PERFORMANCE!!!\n", "[[583 21]\n", " [ 24 216]]\n", "starting Accuracy score 0.9466824644549763\n", " starting f1 score: 0.9056603773584907\n", "\n", "STARTING DEFENDERS PERFORMANCE!!!\n", "[[1449 208]\n", " [ 200 915]]\n", "starting Accuracy score 0.8528138528138528\n", " starting f1 score: 0.817694369973190\n", "\n", "STARTING MIDFIELDERS PERFORMANCE!!!\n", "[[1595 288]\n", " [ 235 1195]]\n", "starting Accuracy score 0.8421370359191066\n", " starting f1 score: 0.820460006865774\n", "\n", "STARTING FORWARDS PERFORMANCE!!!\n", "[[497 88]\n", " [ 69 325]]\n", "starting Accuracy score 0.839632277834525\n", " starting f1 score: 0.8054522924411399" ] }, { "cell_type": "code", "execution_count": 21, "metadata": { "colab": { "base_uri": "https://localhost:8080/" }, "id": "FXFtoCzvECJZ", "outputId": "d176ee4f-e3ff-4d82-f5c7-b2a6ffbb040c" }, "outputs": [ { "output_type": "stream", "name": "stdout", "text": [ "STARTING DEFENDERS PERFORMANCE!!!\n", "[[1286 404]\n", " [ 121 992]]\n", "starting Accuracy score 0.8127006778451659\n", " starting f1 score: 0.7907532881626147\n" ] } ], "source": [ "print(f\"STARTING DEFENDERS PERFORMANCE!!!\")\n", "a=val_[val_[\"position\"]==\"DEF\"][\"actul_minutes\"]\n", "b=val_[val_[\"position\"]==\"DEF\"][\"predicted_minutes\"]\n", "print(confusion_matrix(a,b))\n", "print(\n", " f\"starting Accuracy score {accuracy_score(a,b)}\"\n", ")\n", "\n", "print(\n", " f\" starting f1 score: {f1_score(a,b)}\"\n", ")" ] }, { "cell_type": "code", "execution_count": 22, "metadata": { "colab": { "base_uri": "https://localhost:8080/" }, "id": "lhgC0BAdD7xZ", "outputId": "f3de6e5e-74d7-4f7c-e22b-21be0d28af4f" }, "outputs": [ { "output_type": "stream", "name": "stdout", "text": [ "STARTING MIDFIELDERS PERFORMANCE!!!\n", "[[1443 499]\n", " [ 113 1389]]\n", "starting Accuracy score 0.8222996515679443\n", " starting f1 score: 0.8194690265486725\n" ] } ], "source": [ "print(f\"STARTING MIDFIELDERS PERFORMANCE!!!\")\n", "a=val_[val_[\"position\"]==\"MID\"][\"actul_minutes\"]\n", "b=val_[val_[\"position\"]==\"MID\"][\"predicted_minutes\"]\n", "print(confusion_matrix(a,b))\n", "print(\n", " f\"starting Accuracy score {accuracy_score(a,b)}\"\n", ")\n", "\n", "print(\n", " f\" starting f1 score: {f1_score(a,b)}\"\n", ")" ] }, { "cell_type": "code", "execution_count": 23, "metadata": { "colab": { "base_uri": "https://localhost:8080/" }, "id": "OlE-qPpSEFOA", "outputId": "285f3d00-99b5-49f5-98c6-c06050571ecb" }, "outputs": [ { "output_type": "stream", "name": "stdout", "text": [ "STARTING FORWARDS PERFORMANCE!!!\n", "[[473 142]\n", " [ 19 370]]\n", "starting Accuracy score 0.8396414342629482\n", " starting f1 score: 0.8213096559378469\n" ] } ], "source": [ "print(f\"STARTING FORWARDS PERFORMANCE!!!\")\n", "a=val_[val_[\"position\"]==\"FWD\"][\"actul_minutes\"]\n", "b=val_[val_[\"position\"]==\"FWD\"][\"predicted_minutes\"]\n", "print(confusion_matrix(a,b))\n", "print(\n", " f\"starting Accuracy score {accuracy_score(a,b)}\"\n", ")\n", "\n", "print(\n", " f\" starting f1 score: {f1_score(a,b)}\"\n", ")" ] }, { "cell_type": "code", "execution_count": 24, "metadata": { "colab": { "base_uri": "https://localhost:8080/" }, "id": "QidLuN2qpwHI", "outputId": "1d9f4dad-5139-44c9-e662-0c950fc82fe9" }, "outputs": [ { "output_type": "execute_result", "data": { "text/plain": [ "MID 148\n", "DEF 104\n", "FWD 42\n", "GKP 22\n", "Name: position, dtype: int64" ] }, "metadata": {}, "execution_count": 24 } ], "source": [ "test_copy[test_copy[\"minutes\"]==1][\"position\"].value_counts()" ] }, { "cell_type": "code", "execution_count": 25, "metadata": { "id": "E5XuG1jhpt4s" }, "outputs": [], "source": [ "train = train_copy[train_copy[\"minutes\"] > 0]\n", "test = test_copy[test_copy[\"minutes\"] > 0]" ] }, { "cell_type": "code", "execution_count": 26, "metadata": { "id": "L5DlE5tLsWo8" }, "outputs": [], "source": [ "\n", "# predict points\n", "train[\"index\"] = train[\"name\"] + train[\"kickoff_time\"].astype(\"str\")\n", "train.drop_duplicates(\"index\", keep=\"last\", inplace=True)\n", "train= train.set_index(\"index\")\n", "train[\"date\"]=pd.to_datetime(train[\"kickoff_time\"])\n", "train[\"day_of week\"]=train[\"date\"].dt.day_name\n", "train[\"month\"]=train[\"date\"].dt.month\n", "train[\"hour\"]=train[\"date\"].dt.hour\n", "train[\"week\"]=train[\"date\"].dt.week\n", "train.drop([\"kickoff_time\",\"date\"], axis=1, inplace=True)\n", "\n", "test[\"index\"] = test[\"name\"] + test[\"kickoff_time\"].astype(\"str\")\n", "test = test.set_index(\"index\")\n", "test[\"date\"]=pd.to_datetime(test[\"kickoff_time\"])\n", "test[\"day_of week\"]=test[\"date\"].dt.day_name\n", "test[\"month\"]=test[\"date\"].dt.month\n", "test[\"hour\"]=test[\"date\"].dt.hour\n", "test[\"week\"]=test[\"date\"].dt.week\n", "test.drop([\"kickoff_time\",\"date\"], axis=1, inplace=True)\n", "\n", "target = train[[\"total_points\", \"GW\",\"position\" ]]\n", "train.drop([\"total_points\", \"minutes\"], axis=1, inplace=True)\n", "test.drop([\"total_points\", \"minutes\"], axis=1, inplace=True)\n", "train.drop(dropped_columns, axis=1, inplace=True)\n", "test.drop(dropped_columns, axis=1, inplace=True)\n", "\n", "\n", "for col in train.columns:\n", " if train[col].dtype == \"object\":\n", " if col not in [\"team\", \"name\",\"position\"]:\n", " train[col] = pd.factorize(train[col])[0]\n", " test[col] = pd.factorize(test[col])[0]\n", "\n", "train[\"position\"]=train[\"position\"].astype(\"category\")\n", "test[\"position\"]=test[\"position\"].astype(\"category\")\n", "train[\"was_home\"] = train[\"was_home\"].replace({True: 0, False: 1})\n", "test[\"was_home\"] = test[\"was_home\"].replace({True: 0, False: 1})\n", "\n", "train = train[statistics + leak_columns+date_cols]\n", "test= test[statistics + leak_columns+date_cols]\n", "\n", "x, val, y, y_val = train_test_split(\n", " train.drop(leak_columns, axis=1),\n", " target[\"total_points\"],\n", " test_size=0.1,\n", " random_state=0,\n", ")" ] }, { "cell_type": "code", "execution_count": 27, "metadata": { "colab": { "base_uri": "https://localhost:8080/" }, "id": "xNZGBu2FhgFj", "outputId": "c2370e4b-a4cd-4551-b96a-a837458b120d" }, "outputs": [ { "output_type": "stream", "name": "stdout", "text": [ "1\n", "2\n", "3\n", "4\n", "5\n", "6\n", "7\n", "8\n", "[2.75799904510382, 2.6652997142930492, 2.6832936632856086, 2.8264025093811855, 2.8638436004667875, 2.7048447357279364, 2.7494556057432997, 2.6377462697198215]\n", "[1.5229543294014234, 1.9308316970996955, 1.892765585734737, 1.9056027047168056, 1.6943890843424334, 1.7993653432035002, 1.5870089580031228, 1.9578922425800178]\n" ] } ], "source": [ "from sklearn.model_selection import KFold\n", "\n", "#cross_validator to splite the data into folds\n", "folds=KFold(n_splits=8,shuffle=True,random_state=0)\n", "\n", "#a dataframe to store the predictions made by each fold\n", "predictions_df=pd.DataFrame()\n", "\n", "#list to save the mean absolute errors from validatingon each folds\n", "rmse_val=[]\n", "rmse_X=[]\n", "\n", "#a simple catboost regressor\n", "model=LGBMRegressor(**{'colsample_bytree': 0.4199299182268318, 'learning_rate': 0.0032874466037521254, 'max_depth': 9, 'min_split_gain': 0.5685369160138952, 'num_leaves': 99, 'reg_alpha': 0.5621526419488447, 'reg_lambda': 0, 'subsample': 0.6534153111773866}, verbose=-50,random_state=0,early_stopping_rounds=200,n_estimators=10000)\n", "\n", "#train model, make predictions and check the validation accuracy on each fold\n", "for i,(train_index,test_index) in enumerate(folds.split(train.drop(leak_columns, axis=1),target[\"total_points\"])):\n", " train_fold=train.drop(leak_columns, axis=1).iloc[train_index]\n", " val_fold=train.drop(leak_columns, axis=1).iloc[test_index]\n", " y_fold=target[\"total_points\"].iloc[train_index]\n", " y_val_fold=target[\"total_points\"].iloc[test_index]\n", "\n", "\n", " model.fit(train_fold,y_fold,eval_set=[(val_fold,y_val_fold)])\n", " print(i+1)\n", " prediction=model.predict(test.drop(leak_columns, axis=1))\n", " predictions_df[i]=prediction\n", " rmse_val.append(mean_squared_error(model.predict(val_fold),y_val_fold,squared=False))\n", " rmse_X.append(mean_squared_error(model.predict(train_fold),y_fold,squared=False))\n", "print(rmse_val)\n", "print(rmse_X)" ] }, { "cell_type": "code", "execution_count": 28, "metadata": { "colab": { "base_uri": "https://localhost:8080/" }, "id": "R6NBjO7yzPOT", "outputId": "aba2700f-3bf3-4d76-b98c-d83f602aba81" }, "outputs": [ { "output_type": "stream", "name": "stdout", "text": [ "2.7361106429651887\n", "1.786351243135217\n" ] } ], "source": [ "print(np.mean(rmse_val))\n", "print(np.mean(rmse_X))\n", "#2.7575293874474336\n", "#2.0174134721607295" ] }, { "cell_type": "markdown", "metadata": { "id": "475Nb44Amx4V" }, "source": [ "2.7320675301713573\n", "1.6917027774360753" ] }, { "cell_type": "code", "execution_count": 29, "metadata": { "colab": { "base_uri": "https://localhost:8080/" }, "id": "-kDj3_xeq8Ia", "outputId": "be804606-b981-46e4-e9b3-2d50216e1b75" }, "outputs": [ { "output_type": "execute_result", "data": { "text/plain": [ "0 2.924173\n", "1 2.924595\n", "2 2.814342\n", "3 2.395066\n", "4 3.853160\n", " ... \n", "311 2.302227\n", "312 3.181070\n", "313 2.069684\n", "314 1.178730\n", "315 2.053070\n", "Name: 0, Length: 316, dtype: float64" ] }, "metadata": {}, "execution_count": 29 } ], "source": [ "predictions_df[0]" ] }, { "cell_type": "code", "execution_count": 30, "metadata": { "id": "W_b08Rfnp76c" }, "outputs": [], "source": [ "test[\"points\"]=np.mean(predictions_df, axis=1).values\n", "\n", "test[leak_columns + [\"points\", \"value\"]].sort_values(\n", " \"points\", ascending=False\n", ").to_csv(\"points.csv\")" ] }, { "cell_type": "code", "execution_count": 31, "metadata": { "colab": { "base_uri": "https://localhost:8080/", "height": 425 }, "id": "g-ibL8BVAR2e", "outputId": "c58bf3f6-fd8b-4fa9-8505-8047b5417e6e" }, "outputs": [ { "output_type": "execute_result", "data": { "text/plain": [ " name \\\n", "index \n", "Mohamed Salah2023-10-29T14:00:00Z Mohamed Salah \n", "Martin Ødegaard2023-10-28T14:00:00Z Martin Ødegaard \n", "Bukayo Saka2023-10-28T14:00:00Z Bukayo Saka \n", "Son Heung-min2023-10-27T19:00:00Z Son Heung-min \n", "Gabriel Martinelli Silva2023-10-28T14:00:00Z Gabriel Martinelli Silva \n", "James Maddison2023-10-27T19:00:00Z James Maddison \n", "Kaoru Mitoma2023-10-29T14:00:00Z Kaoru Mitoma \n", "Bryan Mbeumo2023-10-28T11:30:00Z Bryan Mbeumo \n", "Diogo Teixeira da Silva2023-10-29T14:00:00Z Diogo Teixeira da Silva \n", "Jarrod Bowen2023-10-29T13:00:00Z Jarrod Bowen \n", "Dominik Szoboszlai2023-10-29T14:00:00Z Dominik Szoboszlai \n", "\n", " points team \n", "index \n", "Mohamed Salah2023-10-29T14:00:00Z 8.735140 Liverpool \n", "Martin Ødegaard2023-10-28T14:00:00Z 6.533175 Arsenal \n", "Bukayo Saka2023-10-28T14:00:00Z 5.956754 Arsenal \n", "Son Heung-min2023-10-27T19:00:00Z 5.859988 Spurs \n", "Gabriel Martinelli Silva2023-10-28T14:00:00Z 5.840069 Arsenal \n", "James Maddison2023-10-27T19:00:00Z 5.806873 Spurs \n", "Kaoru Mitoma2023-10-29T14:00:00Z 5.523091 Brighton \n", "Bryan Mbeumo2023-10-28T11:30:00Z 4.641319 Brentford \n", "Diogo Teixeira da Silva2023-10-29T14:00:00Z 4.614422 Liverpool \n", "Jarrod Bowen2023-10-29T13:00:00Z 4.555775 West Ham \n", "Dominik Szoboszlai2023-10-29T14:00:00Z 4.537633 Liverpool " ], "text/html": [ "\n", "
\n", "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
namepointsteam
index
Mohamed Salah2023-10-29T14:00:00ZMohamed Salah8.735140Liverpool
Martin Ødegaard2023-10-28T14:00:00ZMartin Ødegaard6.533175Arsenal
Bukayo Saka2023-10-28T14:00:00ZBukayo Saka5.956754Arsenal
Son Heung-min2023-10-27T19:00:00ZSon Heung-min5.859988Spurs
Gabriel Martinelli Silva2023-10-28T14:00:00ZGabriel Martinelli Silva5.840069Arsenal
James Maddison2023-10-27T19:00:00ZJames Maddison5.806873Spurs
Kaoru Mitoma2023-10-29T14:00:00ZKaoru Mitoma5.523091Brighton
Bryan Mbeumo2023-10-28T11:30:00ZBryan Mbeumo4.641319Brentford
Diogo Teixeira da Silva2023-10-29T14:00:00ZDiogo Teixeira da Silva4.614422Liverpool
Jarrod Bowen2023-10-29T13:00:00ZJarrod Bowen4.555775West Ham
Dominik Szoboszlai2023-10-29T14:00:00ZDominik Szoboszlai4.537633Liverpool
\n", "
\n", "
\n", "\n", "
\n", " \n", "\n", " \n", "\n", " \n", "
\n", "\n", "\n", "
\n", " \n", "\n", "\n", "\n", " \n", "
\n", "
\n", "
\n" ] }, "metadata": {}, "execution_count": 31 } ], "source": [ "test[test[\"position\"]==\"MID\"].sort_values(by=\"points\",ascending=False).head(11)[[\"name\",\"points\",\"team\"]]" ] }, { "cell_type": "code", "execution_count": 32, "metadata": { "colab": { "base_uri": "https://localhost:8080/", "height": 394 }, "id": "R9-XbtdrAeyB", "outputId": "b6433746-a570-44ea-83ff-cb49732543c9" }, "outputs": [ { "output_type": "execute_result", "data": { "text/plain": [ " name points \\\n", "index \n", "Kieran Trippier2023-10-28T16:30:00Z Kieran Trippier 6.195080 \n", "Trent Alexander-Arnold2023-10-29T14:00:00Z Trent Alexander-Arnold 5.014530 \n", "Lucas Digne2023-10-29T14:00:00Z Lucas Digne 4.710693 \n", "Pau Torres2023-10-29T14:00:00Z Pau Torres 4.661043 \n", "Ezri Konsa Ngoyo2023-10-29T14:00:00Z Ezri Konsa Ngoyo 4.535804 \n", "Andrew Robertson2023-10-29T14:00:00Z Andrew Robertson 4.213994 \n", "Matty Cash2023-10-29T14:00:00Z Matty Cash 4.123158 \n", "Vladimír Coufal2023-10-29T13:00:00Z Vladimír Coufal 3.972972 \n", "Cristian Romero2023-10-27T19:00:00Z Cristian Romero 3.945540 \n", "Joël Veltman2023-10-29T14:00:00Z Joël Veltman 3.920138 \n", "\n", " team \n", "index \n", "Kieran Trippier2023-10-28T16:30:00Z Newcastle \n", "Trent Alexander-Arnold2023-10-29T14:00:00Z Liverpool \n", "Lucas Digne2023-10-29T14:00:00Z Aston Villa \n", "Pau Torres2023-10-29T14:00:00Z Aston Villa \n", "Ezri Konsa Ngoyo2023-10-29T14:00:00Z Aston Villa \n", "Andrew Robertson2023-10-29T14:00:00Z Liverpool \n", "Matty Cash2023-10-29T14:00:00Z Aston Villa \n", "Vladimír Coufal2023-10-29T13:00:00Z West Ham \n", "Cristian Romero2023-10-27T19:00:00Z Spurs \n", "Joël Veltman2023-10-29T14:00:00Z Brighton " ], "text/html": [ "\n", "
\n", "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
namepointsteam
index
Kieran Trippier2023-10-28T16:30:00ZKieran Trippier6.195080Newcastle
Trent Alexander-Arnold2023-10-29T14:00:00ZTrent Alexander-Arnold5.014530Liverpool
Lucas Digne2023-10-29T14:00:00ZLucas Digne4.710693Aston Villa
Pau Torres2023-10-29T14:00:00ZPau Torres4.661043Aston Villa
Ezri Konsa Ngoyo2023-10-29T14:00:00ZEzri Konsa Ngoyo4.535804Aston Villa
Andrew Robertson2023-10-29T14:00:00ZAndrew Robertson4.213994Liverpool
Matty Cash2023-10-29T14:00:00ZMatty Cash4.123158Aston Villa
Vladimír Coufal2023-10-29T13:00:00ZVladimír Coufal3.972972West Ham
Cristian Romero2023-10-27T19:00:00ZCristian Romero3.945540Spurs
Joël Veltman2023-10-29T14:00:00ZJoël Veltman3.920138Brighton
\n", "
\n", "
\n", "\n", "
\n", " \n", "\n", " \n", "\n", " \n", "
\n", "\n", "\n", "
\n", " \n", "\n", "\n", "\n", " \n", "
\n", "
\n", "
\n" ] }, "metadata": {}, "execution_count": 32 } ], "source": [ "test[test[\"position\"]==\"DEF\"].sort_values(by=\"points\",ascending=False).head(10)[[\"name\",\"points\",\"team\"]]" ] }, { "cell_type": "code", "execution_count": 33, "metadata": { "colab": { "base_uri": "https://localhost:8080/", "height": 394 }, "id": "_b9fjvegAsG7", "outputId": "7073a71f-7627-4ab9-ad66-b2a878dca5cb" }, "outputs": [ { "output_type": "execute_result", "data": { "text/plain": [ " name \\\n", "index \n", "Emiliano Martínez Romero2023-10-29T14:00:00Z Emiliano Martínez Romero \n", "David Raya Martin2023-10-28T14:00:00Z David Raya Martin \n", "Alisson Ramses Becker2023-10-29T14:00:00Z Alisson Ramses Becker \n", "Norberto Murara Neto2023-10-28T14:00:00Z Norberto Murara Neto \n", "Ederson Santana de Moraes2023-10-29T15:30:00Z Ederson Santana de Moraes \n", "Alphonse Areola2023-10-29T13:00:00Z Alphonse Areola \n", "Jason Steele2023-10-29T14:00:00Z Jason Steele \n", "Guglielmo Vicario2023-10-27T19:00:00Z Guglielmo Vicario \n", "Mark Flekken2023-10-28T11:30:00Z Mark Flekken \n", "Nick Pope2023-10-28T16:30:00Z Nick Pope \n", "\n", " points team \n", "index \n", "Emiliano Martínez Romero2023-10-29T14:00:00Z 4.385442 Aston Villa \n", "David Raya Martin2023-10-28T14:00:00Z 4.204943 Arsenal \n", "Alisson Ramses Becker2023-10-29T14:00:00Z 3.967894 Liverpool \n", "Norberto Murara Neto2023-10-28T14:00:00Z 3.812593 Bournemouth \n", "Ederson Santana de Moraes2023-10-29T15:30:00Z 3.605701 Man City \n", "Alphonse Areola2023-10-29T13:00:00Z 3.600888 West Ham \n", "Jason Steele2023-10-29T14:00:00Z 3.504677 Brighton \n", "Guglielmo Vicario2023-10-27T19:00:00Z 3.356717 Spurs \n", "Mark Flekken2023-10-28T11:30:00Z 3.235551 Brentford \n", "Nick Pope2023-10-28T16:30:00Z 3.030811 Newcastle " ], "text/html": [ "\n", "
\n", "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
namepointsteam
index
Emiliano Martínez Romero2023-10-29T14:00:00ZEmiliano Martínez Romero4.385442Aston Villa
David Raya Martin2023-10-28T14:00:00ZDavid Raya Martin4.204943Arsenal
Alisson Ramses Becker2023-10-29T14:00:00ZAlisson Ramses Becker3.967894Liverpool
Norberto Murara Neto2023-10-28T14:00:00ZNorberto Murara Neto3.812593Bournemouth
Ederson Santana de Moraes2023-10-29T15:30:00ZEderson Santana de Moraes3.605701Man City
Alphonse Areola2023-10-29T13:00:00ZAlphonse Areola3.600888West Ham
Jason Steele2023-10-29T14:00:00ZJason Steele3.504677Brighton
Guglielmo Vicario2023-10-27T19:00:00ZGuglielmo Vicario3.356717Spurs
Mark Flekken2023-10-28T11:30:00ZMark Flekken3.235551Brentford
Nick Pope2023-10-28T16:30:00ZNick Pope3.030811Newcastle
\n", "
\n", "
\n", "\n", "
\n", " \n", "\n", " \n", "\n", " \n", "
\n", "\n", "\n", "
\n", " \n", "\n", "\n", "\n", " \n", "
\n", "
\n", "
\n" ] }, "metadata": {}, "execution_count": 33 } ], "source": [ "test[test[\"position\"]==\"GKP\"].sort_values(by=\"points\",ascending=False).head(10)[[\"name\",\"points\",\"team\"]]" ] }, { "cell_type": "code", "execution_count": 34, "metadata": { "colab": { "base_uri": "https://localhost:8080/", "height": 394 }, "id": "DhgfhH7AgUW3", "outputId": "38d4b894-d287-4898-d483-eb736b280def" }, "outputs": [ { "output_type": "execute_result", "data": { "text/plain": [ " name \\\n", "index \n", "Erling Haaland2023-10-29T15:30:00Z Erling Haaland \n", "Ollie Watkins2023-10-29T14:00:00Z Ollie Watkins \n", "Dominic Solanke2023-10-28T14:00:00Z Dominic Solanke \n", "Gabriel Fernando de Jesus2023-10-28T14:00:00Z Gabriel Fernando de Jesus \n", "Darwin Núñez Ribeiro2023-10-29T14:00:00Z Darwin Núñez Ribeiro \n", "Julián Álvarez2023-10-29T15:30:00Z Julián Álvarez \n", "Yoane Wissa2023-10-28T11:30:00Z Yoane Wissa \n", "João Pedro Junqueira de Jesus2023-10-29T14:00:00Z João Pedro Junqueira de Jesus \n", "Callum Wilson2023-10-28T16:30:00Z Callum Wilson \n", "Cody Gakpo2023-10-29T14:00:00Z Cody Gakpo \n", "\n", " points team \n", "index \n", "Erling Haaland2023-10-29T15:30:00Z 6.836103 Man City \n", "Ollie Watkins2023-10-29T14:00:00Z 5.577515 Aston Villa \n", "Dominic Solanke2023-10-28T14:00:00Z 5.046332 Bournemouth \n", "Gabriel Fernando de Jesus2023-10-28T14:00:00Z 4.544171 Arsenal \n", "Darwin Núñez Ribeiro2023-10-29T14:00:00Z 4.181486 Liverpool \n", "Julián Álvarez2023-10-29T15:30:00Z 3.875503 Man City \n", "Yoane Wissa2023-10-28T11:30:00Z 3.780475 Brentford \n", "João Pedro Junqueira de Jesus2023-10-29T14:00:00Z 3.535017 Brighton \n", "Callum Wilson2023-10-28T16:30:00Z 3.529454 Newcastle \n", "Cody Gakpo2023-10-29T14:00:00Z 3.388202 Liverpool " ], "text/html": [ "\n", "
\n", "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
namepointsteam
index
Erling Haaland2023-10-29T15:30:00ZErling Haaland6.836103Man City
Ollie Watkins2023-10-29T14:00:00ZOllie Watkins5.577515Aston Villa
Dominic Solanke2023-10-28T14:00:00ZDominic Solanke5.046332Bournemouth
Gabriel Fernando de Jesus2023-10-28T14:00:00ZGabriel Fernando de Jesus4.544171Arsenal
Darwin Núñez Ribeiro2023-10-29T14:00:00ZDarwin Núñez Ribeiro4.181486Liverpool
Julián Álvarez2023-10-29T15:30:00ZJulián Álvarez3.875503Man City
Yoane Wissa2023-10-28T11:30:00ZYoane Wissa3.780475Brentford
João Pedro Junqueira de Jesus2023-10-29T14:00:00ZJoão Pedro Junqueira de Jesus3.535017Brighton
Callum Wilson2023-10-28T16:30:00ZCallum Wilson3.529454Newcastle
Cody Gakpo2023-10-29T14:00:00ZCody Gakpo3.388202Liverpool
\n", "
\n", "
\n", "\n", "
\n", " \n", "\n", " \n", "\n", " \n", "
\n", "\n", "\n", "
\n", " \n", "\n", "\n", "\n", " \n", "
\n", "
\n", "
\n" ] }, "metadata": {}, "execution_count": 34 } ], "source": [ "test[test[\"position\"]==\"FWD\"].sort_values(by=\"points\",ascending=False).head(10)[[\"name\",\"points\",\"team\"]]" ] }, { "cell_type": "code", "execution_count": 35, "metadata": { "colab": { "base_uri": "https://localhost:8080/" }, "id": "UCBjSKR_uf6m", "outputId": "05cf4abf-4d6c-4938-abfa-61aa0cebd817" }, "outputs": [ { "output_type": "execute_result", "data": { "text/plain": [ "index\n", "Mohamed Salah2023-10-29T14:00:00Z 8.735140\n", "Erling Haaland2023-10-29T15:30:00Z 6.836103\n", "Martin Ødegaard2023-10-28T14:00:00Z 6.533175\n", "Kieran Trippier2023-10-28T16:30:00Z 6.195080\n", "Bukayo Saka2023-10-28T14:00:00Z 5.956754\n", "Son Heung-min2023-10-27T19:00:00Z 5.859988\n", "Gabriel Martinelli Silva2023-10-28T14:00:00Z 5.840069\n", "James Maddison2023-10-27T19:00:00Z 5.806873\n", "Ollie Watkins2023-10-29T14:00:00Z 5.577515\n", "Kaoru Mitoma2023-10-29T14:00:00Z 5.523091\n", "Dominic Solanke2023-10-28T14:00:00Z 5.046332\n", "Trent Alexander-Arnold2023-10-29T14:00:00Z 5.014530\n", "Lucas Digne2023-10-29T14:00:00Z 4.710693\n", "Pau Torres2023-10-29T14:00:00Z 4.661043\n", "Bryan Mbeumo2023-10-28T11:30:00Z 4.641319\n", "Diogo Teixeira da Silva2023-10-29T14:00:00Z 4.614422\n", "Jarrod Bowen2023-10-29T13:00:00Z 4.555775\n", "Gabriel Fernando de Jesus2023-10-28T14:00:00Z 4.544171\n", "Dominik Szoboszlai2023-10-29T14:00:00Z 4.537633\n", "Ezri Konsa Ngoyo2023-10-29T14:00:00Z 4.535804\n", "Moussa Diaby2023-10-29T14:00:00Z 4.516049\n", "Emiliano Martínez Romero2023-10-29T14:00:00Z 4.385442\n", "Dejan Kulusevski2023-10-27T19:00:00Z 4.312208\n", "Douglas Luiz Soares de Paulo2023-10-29T14:00:00Z 4.309176\n", "John McGinn2023-10-29T14:00:00Z 4.303202\n", "Pascal Groß2023-10-29T14:00:00Z 4.245943\n", "Phil Foden2023-10-29T15:30:00Z 4.234619\n", "Andrew Robertson2023-10-29T14:00:00Z 4.213994\n", "David Raya Martin2023-10-28T14:00:00Z 4.204943\n", "Darwin Núñez Ribeiro2023-10-29T14:00:00Z 4.181486\n", "James Ward-Prowse2023-10-29T13:00:00Z 4.148899\n", "Kai Havertz2023-10-28T14:00:00Z 4.130318\n", "Matty Cash2023-10-29T14:00:00Z 4.123158\n", "Tomáš Souček2023-10-29T13:00:00Z 4.102016\n", "Declan Rice2023-10-28T14:00:00Z 4.045454\n", "Abdoulaye Doucouré2023-10-29T13:00:00Z 4.026932\n", "Raheem Sterling2023-10-28T11:30:00Z 4.004489\n", "Vladimír Coufal2023-10-29T13:00:00Z 3.972972\n", "Alisson Ramses Becker2023-10-29T14:00:00Z 3.967894\n", "Cristian Romero2023-10-27T19:00:00Z 3.945540\n", "Mathias Jensen2023-10-28T11:30:00Z 3.935375\n", "Joël Veltman2023-10-29T14:00:00Z 3.920138\n", "Benjamin White2023-10-28T14:00:00Z 3.919942\n", "Gabriel dos Santos Magalhães2023-10-28T14:00:00Z 3.916704\n", "Julián Álvarez2023-10-29T15:30:00Z 3.875503\n", "Kurt Zouma2023-10-29T13:00:00Z 3.830288\n", "Leandro Trossard2023-10-28T14:00:00Z 3.824626\n", "Norberto Murara Neto2023-10-28T14:00:00Z 3.812593\n", "Virgil van Dijk2023-10-29T14:00:00Z 3.812125\n", "Bruno Borges Fernandes2023-10-29T15:30:00Z 3.783217\n", "Name: points, dtype: float64" ] }, "metadata": {}, "execution_count": 35 } ], "source": [ "test[\"points\"].sort_values(ascending=False).head(50)" ] }, { "cell_type": "code", "execution_count": 36, "metadata": { "id": "TEVZdPSFrxKn" }, "outputs": [], "source": [ "\n", "feature_importance = pd.DataFrame(\n", " {\"column\": x.columns, \"imp\": model.feature_importances_}\n", ").sort_values(\n", " \"imp\", ascending=False\n", ") #" ] }, { "cell_type": "code", "execution_count": 37, "metadata": { "colab": { "base_uri": "https://localhost:8080/", "height": 1000 }, "id": "cp1yUjTNvJnk", "outputId": "76a00ccf-df81-46e9-b3bc-58527fbefefb" }, "outputs": [ { "output_type": "execute_result", "data": { "text/plain": [ " column imp\n", "71 opp mean team Goal scored all 13260\n", "72 opp mean team Goal conceded all 12240\n", "73 opp mean match_result all 12043\n", "64 mean team Goal scored all 10330\n", "62 mean match_result all 10148\n", "66 mean team Goal conceded all 9773\n", "4 percent_value 9079\n", "30 mean influence 3 7914\n", "41 std influence 3 7710\n", "40 std ict_index 3 7637\n", "39 std creativity 3 7578\n", "52 mean goals_conceded all 7534\n", "38 std bps 3 7484\n", "43 std threat 3 7460\n", "26 mean creativity 3 7418\n", "21 opponent_last_season_position 7306\n", "29 mean ict_index 3 7171\n", "77 week 6988\n", "24 mean bps 3 6928\n", "20 GW 6841\n", "55 mean influence all 6814\n", "59 mean threat all 6740\n", "34 mean threat 3 6733\n", "50 mean clean_sheets all 6698\n", "51 mean creativity all 6290\n", "54 mean ict_index all 6284\n", "49 mean bps all 6214\n", "60 mean total_points all 6188\n", "48 mean bonus all 6105\n", "70 opp mean team Goal conceded 3 6060\n", "69 opp mean team Goal scored 3 5964\n", "56 mean minutes all 5962\n", "61 mean value all 5595\n", "31 mean minutes 3 5563\n", "3 last_season_position 5504\n", "63 mean team Goal scored 3 5263\n", "76 hour 5206\n", "36 mean value 3 5156\n", "44 std total_points 3 5131\n", "11 creativity_ex 5055\n", "67 ratio_goal_scored all 4929\n", "19 now_cost_ex 4865\n", "9 minutes_ex 4816\n", "10 goals_conceded_ex 4778\n", "15 bps_ex 4742\n", "42 std minutes 3 4680\n", "0 value 4660\n", "8 total_points_ex 4627\n", "47 mean assists all 4606\n", "13 threat_ex 4594" ], "text/html": [ "\n", "
\n", "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
columnimp
71opp mean team Goal scored all13260
72opp mean team Goal conceded all12240
73opp mean match_result all12043
64mean team Goal scored all10330
62mean match_result all10148
66mean team Goal conceded all9773
4percent_value9079
30mean influence 37914
41std influence 37710
40std ict_index 37637
39std creativity 37578
52mean goals_conceded all7534
38std bps 37484
43std threat 37460
26mean creativity 37418
21opponent_last_season_position7306
29mean ict_index 37171
77week6988
24mean bps 36928
20GW6841
55mean influence all6814
59mean threat all6740
34mean threat 36733
50mean clean_sheets all6698
51mean creativity all6290
54mean ict_index all6284
49mean bps all6214
60mean total_points all6188
48mean bonus all6105
70opp mean team Goal conceded 36060
69opp mean team Goal scored 35964
56mean minutes all5962
61mean value all5595
31mean minutes 35563
3last_season_position5504
63mean team Goal scored 35263
76hour5206
36mean value 35156
44std total_points 35131
11creativity_ex5055
67ratio_goal_scored all4929
19now_cost_ex4865
9minutes_ex4816
10goals_conceded_ex4778
15bps_ex4742
42std minutes 34680
0value4660
8total_points_ex4627
47mean assists all4606
13threat_ex4594
\n", "
\n", "
\n", "\n", "
\n", " \n", "\n", " \n", "\n", " \n", "
\n", "\n", "\n", "
\n", " \n", "\n", "\n", "\n", " \n", "
\n", "
\n", "
\n" ] }, "metadata": {}, "execution_count": 37 } ], "source": [ "feature_importance.head(50)" ] }, { "cell_type": "code", "execution_count": 38, "metadata": { "colab": { "base_uri": "https://localhost:8080/", "height": 990 }, "id": "oB09ezWX0Q6H", "outputId": "4aeae781-1788-4dcc-feaa-c9577d8aaea0" }, "outputs": [ { "output_type": "execute_result", "data": { "text/plain": [ " column imp\n", "47 mean assists all 4606\n", "13 threat_ex 4594\n", "12 influence_ex 4489\n", "35 mean total_points 3 4473\n", "16 ict_index_ex 4352\n", "65 mean team Goal conceded 3 4266\n", "53 mean goals_scored all 3896\n", "37 mean match_result 3 3873\n", "27 mean goals_conceded 3 3276\n", "14 bonus_ex 3126\n", "5 position rank 3105\n", "17 clean_sheets_ex 3048\n", "18 yellow_cards_ex 2859\n", "75 month 2754\n", "7 assists_ex 2454\n", "68 ratio_goal_scored 3 2304\n", "1 position 2169\n", "6 goals_scored_ex 2164\n", "2 was_home 1642\n", "25 mean clean_sheets 3 1341\n", "46 mean saves 3 1020\n", "22 mean assists 3 1001\n", "23 mean bonus 3 994\n", "45 std value 3 971\n", "57 mean own_goals all 933\n", "58 mean red_cards all 778\n", "28 mean goals_scored 3 329\n", "32 mean own_goals 3 8\n", "33 mean red_cards 3 0\n", "74 day_of week 0" ], "text/html": [ "\n", "
\n", "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
columnimp
47mean assists all4606
13threat_ex4594
12influence_ex4489
35mean total_points 34473
16ict_index_ex4352
65mean team Goal conceded 34266
53mean goals_scored all3896
37mean match_result 33873
27mean goals_conceded 33276
14bonus_ex3126
5position rank3105
17clean_sheets_ex3048
18yellow_cards_ex2859
75month2754
7assists_ex2454
68ratio_goal_scored 32304
1position2169
6goals_scored_ex2164
2was_home1642
25mean clean_sheets 31341
46mean saves 31020
22mean assists 31001
23mean bonus 3994
45std value 3971
57mean own_goals all933
58mean red_cards all778
28mean goals_scored 3329
32mean own_goals 38
33mean red_cards 30
74day_of week0
\n", "
\n", "
\n", "\n", "
\n", " \n", "\n", " \n", "\n", " \n", "
\n", "\n", "\n", "
\n", " \n", "\n", "\n", "\n", " \n", "
\n", "
\n", "
\n" ] }, "metadata": {}, "execution_count": 38 } ], "source": [ "feature_importance.tail(30)" ] }, { "cell_type": "markdown", "metadata": { "id": "GBl8UYussJIl" }, "source": [ "#Save predictions" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "colab": { "background_save": true }, "id": "HRm_1_f0sb1d" }, "outputs": [], "source": [ "test_copy[test_copy[\"position\"]==\"DEF\"][[\"name\",\"team\",\"minutes\"]].to_csv(f\"/content/drive/MyDrive/Fplpredict /predicted_dataset/GW{gameweek}/defenders_minutes.csv\")\n", "test_copy[test_copy[\"position\"]==\"GKP\"][[\"name\",\"team\",\"minutes\"]].to_csv(f\"/content/drive/MyDrive/Fplpredict /predicted_dataset/GW{gameweek}/goalkeepers_minutes.csv\")\n", "test_copy[test_copy[\"position\"]==\"MID\"][[\"name\",\"team\",\"minutes\"]].to_csv(f\"/content/drive/MyDrive/Fplpredict /predicted_dataset/GW{gameweek}/midfielders_minutes.csv\")\n", "test_copy[test_copy[\"position\"]==\"FWD\"][[\"name\",\"team\",\"minutes\"]].to_csv(f\"/content/drive/MyDrive/Fplpredict /predicted_dataset/GW{gameweek}/forwards_minutes.csv\")" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "colab": { "background_save": true }, "id": "jGCIVTg1uCs3" }, "outputs": [], "source": [ "test[test[\"position\"]==\"DEF\"][[\"name\",\"team\",\"points\",\"value\"]].to_csv(f\"/content/drive/MyDrive/Fplpredict /predicted_dataset/GW{gameweek}/defenders_points.csv\")\n", "test[test[\"position\"]==\"GKP\"][[\"name\",\"team\",\"points\",\"value\"]].to_csv(f\"/content/drive/MyDrive/Fplpredict /predicted_dataset/GW{gameweek}/goalkeepers_points.csv\")\n", "test[test[\"position\"]==\"MID\"][[\"name\",\"team\",\"points\",\"value\"]].to_csv(f\"/content/drive/MyDrive/Fplpredict /predicted_dataset/GW{gameweek}/midfielders_points.csv\")\n", "test[test[\"position\"]==\"FWD\"][[\"name\",\"team\",\"points\",\"value\"]].to_csv(f\"/content/drive/MyDrive/Fplpredict /predicted_dataset/GW{gameweek}/forwards_points.csv\")" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "colab": { "background_save": true }, "id": "G_OjzKsigsv_" }, "outputs": [], "source": [] } ], "metadata": { "colab": { "provenance": [], "mount_file_id": "1aEyJ1htjtEffPwyNX1iNXKt1AcmE_aIT", "authorship_tag": "ABX9TyPQfJgzw5l5i50tJbZklE/V", "include_colab_link": true }, "kernelspec": { "display_name": "Python 3", "name": "python3" }, "language_info": { "name": "python" } }, "nbformat": 4, "nbformat_minor": 0 }