{ "cells": [ { "cell_type": "code", "execution_count": 1, "metadata": { "collapsed": false }, "outputs": [], "source": [ "from random import random\n", "import pandas as pd\n", "import numpy as np\n", "import tensorflow as tf\n", "from tensorflow.contrib.learn.python.learn.datasets import base\n", "\n", "import sys\n", "sys.path.append(\"..\")\n", "from influence.dataset import DataSet" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "## Data preparation" ] }, { "cell_type": "code", "execution_count": 2, "metadata": { "collapsed": false }, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
PassengerIdSurvivedPclassNameSexAgeSibSpParchTicketFareCabinEmbarked
0103Braund, Mr. Owen Harrismale22.010A/5 211717.2500NaNS
1211Cumings, Mrs. John Bradley (Florence Briggs Th...female38.010PC 1759971.2833C85C
2313Heikkinen, Miss. Lainafemale26.000STON/O2. 31012827.9250NaNS
3411Futrelle, Mrs. Jacques Heath (Lily May Peel)female35.01011380353.1000C123S
4503Allen, Mr. William Henrymale35.0003734508.0500NaNS
\n", "
" ], "text/plain": [ " PassengerId Survived Pclass \\\n", "0 1 0 3 \n", "1 2 1 1 \n", "2 3 1 3 \n", "3 4 1 1 \n", "4 5 0 3 \n", "\n", " Name Sex Age SibSp \\\n", "0 Braund, Mr. Owen Harris male 22.0 1 \n", "1 Cumings, Mrs. John Bradley (Florence Briggs Th... female 38.0 1 \n", "2 Heikkinen, Miss. Laina female 26.0 0 \n", "3 Futrelle, Mrs. Jacques Heath (Lily May Peel) female 35.0 1 \n", "4 Allen, Mr. William Henry male 35.0 0 \n", "\n", " Parch Ticket Fare Cabin Embarked \n", "0 0 A/5 21171 7.2500 NaN S \n", "1 0 PC 17599 71.2833 C85 C \n", "2 0 STON/O2. 3101282 7.9250 NaN S \n", "3 0 113803 53.1000 C123 S \n", "4 0 373450 8.0500 NaN S " ] }, "execution_count": 2, "metadata": {}, "output_type": "execute_result" } ], "source": [ "fpath_titanic = \"/home/eolus/Desktop/Dauphine/datamining/projets/blackBox/data/train.csv\"\n", "data_df = pd.read_csv(fpath_titanic)\n", "data_df.head()" ] }, { "cell_type": "code", "execution_count": 3, "metadata": { "collapsed": false }, "outputs": [], "source": [ "def extract_prefix(name):\n", " import re\n", " try:\n", " return re.search('(Mr\\.)|(Mrs\\.)|(Miss\\.)', name).group()\n", " except:\n", " return \"\"\n", "\n", "# Add prefix feature\n", "data_df['Prefix'] = data_df.Name.apply(extract_prefix)\n", "\n", "# Convert to categorical type\n", "for cat_col in ['Sex', 'Embarked', 'Prefix' ]:\n", " data_df[cat_col] = pd.factorize(data_df[cat_col])[0]\n", "\n", "# Fill NA\n", "data_df['Age'].fillna(data_df.Age.mean(), inplace=True)\n", "\n", "# Shuffle\n", "data_df = data_df.sample(frac=1)\n", "\n", "# Get train dataset and keep track of passengerIDs\n", "train_df = data_df.iloc[:int(0.7*len(data_df))]\n", "test_df = data_df.iloc[int(0.7*len(data_df)):]\n", "\n", "train_idx = data_df.iloc[:int(0.7*len(data_df))]\n", " \n", "# Prepare X, y\n", "columns = ['Prefix', 'Pclass', 'Sex', 'Age', 'SibSp', 'Parch', 'Fare', 'Embarked']\n", "X_train = np.array(train_df[columns])\n", "X_test = np.array(test_df[columns])\n", "y_train = np.array((train_df.Survived > 0).astype('int32'))\n", "y_test = np.array((test_df.Survived > 0).astype('int32'))\n", "\n", "# Retrieve PassengerId\n", "train_idx = train_df.PassengerId.values.tolist()\n", "test_idx = test_df.PassengerId.values.tolist()\n", "\n", "# Scale X\n", "from sklearn.preprocessing import StandardScaler\n", "scaler = StandardScaler()\n", "X_train = scaler.fit_transform(X_train)\n", "X_test = scaler.fit_transform(X_test)" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "### Model" ] }, { "cell_type": "code", "execution_count": 4, "metadata": { "collapsed": false }, "outputs": [], "source": [ "def get_tf_model(X_train, X_test, y_train, y_test):\n", " \"\"\"\n", " Fit BinaryLogisticRegressionWithLBFGS object using training data X_train, y_train\n", " X_test and y_test readily available in object attributes for eval\n", " \"\"\"\n", " \n", " from influence.binaryLogisticRegressionWithLBFGS import BinaryLogisticRegressionWithLBFGS\n", " \n", " # Create dataset\n", " lr_train = DataSet(X_train, np.array(y_train, dtype=int))\n", " lr_test = DataSet(X_test, np.array(y_test, dtype=int))\n", " lr_validation = None\n", " lr_data_sets = base.Datasets(train=lr_train, validation=lr_validation, test=lr_test)\n", "\n", " # Params\n", " num_classes = 2\n", " input_dim = len(X_test.T)\n", " weight_decay = 0.01\n", " batch_size = 100\n", " initial_learning_rate = 0.001 \n", " keep_probs = None\n", " decay_epochs = [1000, 10000]\n", " max_lbfgs_iter = 1000\n", "\n", " # Ini new graph\n", " with tf.Graph().as_default(): # Replaces tf.reset_default_graph() in jupyter env\n", "\n", " tf_model = BinaryLogisticRegressionWithLBFGS(\n", " input_dim=input_dim,\n", " weight_decay=weight_decay,\n", " max_lbfgs_iter=max_lbfgs_iter,\n", " num_classes=num_classes, \n", " batch_size=batch_size,\n", " data_sets=lr_data_sets,\n", " initial_learning_rate=initial_learning_rate,\n", " keep_probs=keep_probs,\n", " decay_epochs=decay_epochs,\n", " mini_batch=False,\n", " train_dir='tmp',\n", " log_dir='tmp',\n", " model_name='titanic')\n", " \n", " # Fit model and return\n", " tf_model.train()\n", " return tf_model\n", "\n", "\n", "def get_top_train_influence():\n", " \"\"\"\n", " Approximate most influential train points for a test point\n", " idx : index of test point\n", " \"\"\"\n", " tf_model = get_tf_model(X_train, X_test, y_train, y_test)\n", " \n", " num_train = len(tf_model.data_sets.train.labels)\n", " influences = tf_model.get_influence_on_test_loss(\n", " np.arange(len(tf_model.data_sets.test.labels)), \n", " np.arange(len(tf_model.data_sets.train.labels)),\n", " force_refresh=True) * num_train\n", " influences_sorted = sorted(enumerate(influences),\n", " key=lambda x:x[1],\n", " reverse=True)\n", " return influences_sorted\n", "\n", "\n", "def get_pred_score(tf_model):\n", " preds_p = tf_model.get_preds().tolist()\n", " preds = [1 if el[0] < 0.5 else 0 for el in preds_p]\n", " ref = tf_model.data_sets.test.labels\n", "\n", " same = 0\n", " for test_point in zip(preds, ref):\n", " if test_point[0] == test_point[1]:\n", " same += 1\n", " score = same / len(ref)\n", " return score" ] }, { "cell_type": "code", "execution_count": 5, "metadata": { "collapsed": false }, "outputs": [], "source": [ "def get_model(X_train, y_train):\n", " from sklearn.linear_model import LogisticRegressionCV\n", " logit = LogisticRegressionCV()\n", " logit.fit(X_train, y_train)\n", " return logit\n", "\n", "def get_score(X_test, y_test, X_train=None, y_train=None, model=None):\n", " if model is None:\n", " model = get_model(X_train, y_train)\n", " return model.score(X_test, y_test)\n", "\n", "def get_predictions(X_test, y_test, X_train=None, y_train=None, model=None):\n", " if model is None:\n", " model = get_model(X_train, y_train)\n", " return model.predictions(X_test, y_test)\n", "\n", "\n", "def get_scores(train_idx):\n", " \n", " # Get prediction score when using increasing fraction of training point starting from most influential\n", " scores = []\n", " for i in range(5, 105, 5):\n", " max_idx = int(i*len(train_idx)/100)\n", " sample_idx = train_idx[:max_idx]\n", " \n", " model = get_model(X_train=X_train[sample_idx], y_train=y_train[sample_idx])\n", " \n", " score = np.mean([\n", " get_score(X_test=X_test,\n", " y_test=y_test,\n", " model=model) for _ in range(3)])\n", "\n", " scores.append((i, score))\n", " \n", " return scores" ] }, { "cell_type": "code", "execution_count": 6, "metadata": { "collapsed": true }, "outputs": [], "source": [ "import matplotlib.pyplot as plt\n", "def plot(x, y, label):\n", " plt.plot( x, y, '-', label=label)" ] }, { "cell_type": "code", "execution_count": 7, "metadata": { "collapsed": false }, "outputs": [ { "name": "stderr", "output_type": "stream", "text": [ "Using TensorFlow backend.\n" ] }, { "name": "stdout", "output_type": "stream", "text": [ "WARNING:tensorflow:From /home/eolus/.local/lib/python3.6/site-packages/tensorflow/python/ops/nn_grad.py:249: calling reduce_sum (from tensorflow.python.ops.math_ops) with keep_dims is deprecated and will be removed in a future version.\n", "Instructions for updating:\n", "keep_dims is deprecated, use keepdims instead\n", "Total number of parameters: 8\n", "Using normal model\n", "LBFGS training took [11] iter.\n", "After training with LBFGS: \n", "Optimization terminated successfully.\n", " Current function value: -0.011656\n", " Iterations: 5\n", " Function evaluations: 6\n", " Gradient evaluations: 10\n", " Hessian evaluations: 19\n" ] }, { "data": { "image/png": "iVBORw0KGgoAAAANSUhEUgAAAX0AAAD8CAYAAACb4nSYAAAABHNCSVQICAgIfAhkiAAAAAlwSFlz\nAAALEgAACxIB0t1+/AAAADl0RVh0U29mdHdhcmUAbWF0cGxvdGxpYiB2ZXJzaW9uIDIuMS4yLCBo\ndHRwOi8vbWF0cGxvdGxpYi5vcmcvNQv5yAAAIABJREFUeJzt3Xd4VGX2wPHvSUhICJBKTQIJSA+Q\nQAhNXZQFsSLoqiwqWNeCupbdteNadnXXH6urrIJ9XRVQESOiKFYUBUKH0EJNQguppJd5f3/cIQyh\nZEgmmUnmfJ5nnmTu3HvnzExy7jtvFWMMSimlvIOPuwNQSinVeDTpK6WUF9Gkr5RSXkSTvlJKeRFN\n+kop5UU06SullBfRpK+UUl5Ek75SSnkRTfpKKeVFWrg7gJoiIiJMTEyMu8NQSqkmZdWqVYeNMe1q\n28/jkn5MTAwpKSnuDkMppZoUEdnjzH5OVe+IyDgR2SoiaSLy4Eke7yIi34nIGhFZLyIXOTz2kP24\nrSJygfMvQSmllKvVWtIXEV9gJjAGyABWikiyMSbVYbdHgXnGmFdEpC+wCIix/34N0A/oDCwRkZ7G\nmCpXvxCllFK1c6aknwSkGWN2GmPKgTnA+Br7GKCt/fdgYJ/99/HAHGNMmTFmF5BmP59SSik3cCbp\nRwLpDvcz7NscPQFcKyIZWKX8u87gWETkVhFJEZGUrKwsJ0NXSil1plzVZXMS8LYxJgq4CHhXRJw+\ntzFmtjEm0RiT2K5drY3PSiml6siZ3juZQLTD/Sj7Nkc3AeMAjDG/iEgAEOHksUoppRqJM6XxlUAP\nEYkVEX+shtnkGvvsBUYDiEgfIADIsu93jYi0FJFYoAewwlXBK6WUOjO1lvSNMZUiMg1YDPgCbxpj\nNonIk0CKMSYZuB94TUTuxWrUnWqsdRg3icg8IBWoBO7UnjvK09hshtT9BazYlUNIKz+Gdw+nU3Bg\noz7/1oNH+HVnNrlF5fU6V3Arf6JDA4kOa0VUaCBtAvxcFGUDKzsCqclQkguhXSGkq/UzILhBnzY/\nfy+Z+1eTeTiVfXk7iZWWnBvYue4n7H4+dB3uugAbgHjaGrmJiYlGB2ephmSMYUdWIct2ZLMsLZtf\nd2WTV1xx3D6xEUEM7x7OiO7hDOsWTkTrli59/p2Hi1i2I5tfdhzmlx3Z5NqfX6Q+5z1xW0grP6JD\nWxEdFkh0aCuiwlpVXxQiQwIJ8POt+xPWlzGw91dY8z/Y9AlUFJ24T0AIhHRxuBDEHLsghHQBv9Nf\nnIuLD7Nv/2oyszaSkZtGZmEmmaWH2VdZSCZVHPE58Q0fXVTMw9m5tK+ynekLghaB8IcfoV3PMzy2\n/kRklTEmsdb9NOl7hvziCpZsPki7Ni1JjAmllb/HDZZusowxpOeUsGzHYSvR7swm60gZAJEhgccl\n99zicn7Zkc2yHdms2JVDYVklAL06tKneb2i3cIIDTyxBL9u3jNLKUhI7JtLWv+1xj6XnFPOL/bmX\n7TjMwQLr+TsFBzCiewTDu4czvHs4kSF1/4ZhjCGvuIL03GLSc0rsP4tJzy0hI6eYjNwSymsksg5t\nWxIVeuxCYF0YrAtEp+AAWvg61x/DGMPP+35GEIZ1Goavz2kuJkcOwroPrGSfvR38W0O/CTDoegg/\nC/L2QO4eh597rd/z9kJl6fHnat0BQrpSERLNjwF+bCzLJrMki8yKAjKoIKdGUg+wGTrjQ6RvKyJb\nhhHVOpLI4G5EtutLxw4DmZ/+Da+sewV/H3/uS7yPiT0m4uNsn5QjB+A/wyEkGm5aAi38nTvORTTp\nNwE2m+GXndnMXZnOl5sOUF5p/UP6+QoJ0aHViSChSwgtW7ixRNYEHcgv5Zedh1mWZiXwzLwSANq1\nacnwblbyHtE9guiwQOQUxevKKhsbMvPtJfJsVu7OoazSho9AXGSw9fl0C2dAdBD/XvtPPt7+MQA+\n4kOP4F6094ujtCCWbekRZGRbn214kL/94hHBiO7hdA1vdcrndzWbzXDoSNmxi4HDhSEjt4T9+SXY\nHNKBr4/QOSTA+qYQalUXRYcd+9bQro317Wdp5lJeXvMym3M2A9C+VXvGdx/PhLMmEN3W3o+jqhK2\nfwVr3oVti8FUQfQwGHQd9L0cWrZ25gVA0aHjLghp2ZuYX7CNheYIuT7Qwhg62oRI3wCi/EOJDOpI\nZHAskeF9iOwYT3hYT8Tn9El8T8Ee/vrLX1l5YCVDOg5h+vDpdG3b1bk3efNCmDsZzr4XfvuEc8e4\niCZ9D7Yvr4QPUzL4cFU6GbkltA1oweUJkUwcFEV+SQXLdhzm1x3ZbMjMx2YgwM+HxK5h1SXN/pHB\nTpfAmjtjDFn2RLYnu5hVe3L5ZUc2Ow9bVQXBgX5Wkj/Leu+6t2td5yRbVlnFmr15Vol9RzZr0nOp\n9MmmVdR7+ARkEhc0gRDiWJ2VQoHZgm+rvYhUIfjSKaAnIzoP48KzzmZg+4G09HVddZGrVFTZ2J9X\n6vANwfHCUMLhwjKHvQ0BbXYS2OFrKv1200raMyL8Gnp1iGBd3lcs27cMm7ExJCyOCbYAfrv9ZwIL\nD0JQe4ifBAnXQUSPOsVZWF7Il7u/5JPtn7D+8HpaSAtGRY9iQvfLGNFpOC38Apw+lzGG3OKK6gtf\nRGt/kmLDAJi/fT7/l/J/lFWVcXv87UzpNwU/HyfaSJLvgtXvwtSFEHN2nV5jXWjS9zBllVUsST3E\n3JR0lm7PwhgYeVY4VyVGc0G/jietW80vqWDFrhyW2et9txw4AkDrli1Iig1jhP2bQJ+ObfE5Sd1k\nc2CMIb+kokZ1xbFklJlbQlnlsSqLIH9fhnY7Vl3Tt1PDvTdf7fqOx35+hApbFWFF17NzbwwBfr4M\nibE+m8ExrSn13UHKwRWsOLCCTdmbsBkbLX1bEt8unqROSSR1TKJfRD/nkomblZRXkZFbzA97VzB/\n1xukl2zAnzBaF48j5+BAjpRauaRfhC83RaZwsPhzPjV5ZPq1oA0+XNh+CBMH30XfdgPO+MJrjGH1\nodXM3z6fr/d8TUllCd2DuzOhxwQu7X4pYQFhpzy2sKzS/s3GXtVl//vJsP89FZUf37eka3grfjc4\niisHR+PjV8Dflv+Nb/Z+Q++w3jwx4gn6hfc7fbBlhTDrHKgsh9t/hsCQM3qtdaVJ30NsOVDA3JXp\nLFiTSW5xBZ2CA/jd4Ch+lxhNdFirMzpXdmEZv+48dhE4WpoNaeXHsFjXlGbdoaiskozckpOULq2k\nfsRer35UcKBfdRVDtL1hMsreWNk1PAi/Bv4WVGWrYubamby24TV6h/Vmxm9mEN02mqKySvx8ffBv\ncfLnP1J+hNUHV7P8wHJW7F/B1tytALRq0YpBHQYxtONQ+oT3IbJ1JB2DOtLCx7PadTYe3sjLa17m\n530/Ex4Qzi0DbuHKnldWf2spSN9E1lcz6JTxOa1MCTtNJ1aFX0T+wCS2s4Zv0r+hrKqMnqE9mdhj\nIhfHXkxIwOkTYlZxFsk7klmQtoDdBbsJ8gtiXMw4JvaYSP+I/tV/5+WVNtZn5LHlwBHSc4vJcPgb\nyq3RSN/K37e6cTuqxt/Q1oPW/+uvO3PwEfhNz3ZcPSQaWm3guZS/k1Oaw5S+U7g9/nYCW5ym/SVj\nFbwxxmqruPKN+r3xTtKk70YFpRV8tm4f81amsy4jHz9fYWzfjlw1JJqzz4rA10Ulz9PVW4+w1zfX\nVm/dGMoqq9iXV3pCUj/auJhdo5tioJ/vcUk9yiGpR4e1oq0buyHmlObw5x//zPL9y5nYYyIPJT1E\nQAvnqxMc5ZbmsvLASlYcsL4J7MrfVf2Yr/jSMagjnVt3JrJ1ZPUtqk0Uka0jiQiMcL6BsZ625mzl\n5bUv833694S0DOGmuJu4uvfVxye9yjJ4YQCUFUC/CWTEXsG7GZ34eE0mhwvLad+mJZcmhBLWYRM/\n7v+c1OxU/Hz8OL/L+Uw8ayJDOw2tbvytsFWwNGMpn2z/hKWZS6kyVQxqP4iJPSYypusYWvm1ospm\n2JiZb28Yz2blrhxKKqwSu7+vD5Ghgcf/3Tgk97Ag/1r/H/ZkF/FhSgYfrcrgQEEp4UH+XDwwhCNB\nn7Ak4zOi20Qzffh0hnYaeuqT/PBP+O5pmPgaDLiq3p9DbTTpu8Hqvbn875c9LNq4n9IKG707tuGq\nxGguT4gkLKhhW/KNMew9rofI8T1URnS3vgkM7xZBx+C6JalTqbIZDhSUHv8V2p7Q03OLOVBQelx3\nQj9fITLk2D+kY2krOqwV4U78U7rD2kNruf+H+8kvy+eRoY8woccEl54/qziLnfk7rW6FR29HrJ9Z\nJcfPSeXv43/8BaGN/aLQ2rooBLcMrvd7uDNvJzPXzuSrPV/Rxq8NU/pN4dq+1xLkF3Tizqv/a9Vl\nX/eJ1VfdrqLKxrdbDvFhSjrfbc2iymZIignj3LgKcn1/YvGeReSX5dMpqBPjzxpPWWUZyTuSyS7N\nJiIwgsu6X8aEsybQpU1Xth06Ul3AWb4rmyOl1jfAHu1b26s6IxgQFUzHtgEuq9Krshl+3JbF3JXp\nLNl8kEqboVfMQUraziG3Yj8TzprA/Yn3E9zyJOMJbFXw1kVwKBVu+8nqZtqANOk3sh+3ZTHlrRW0\n9m/BpfGduToxmgFR9f/Hq6vT9UXvVt0HPYJh3cIIr6UPujGG7KLy6oRuNXodK7HvyyuhourY35EI\ndGwbcFz3P8ek3qFtgMu+7TQGYwzvb3mf51c+T8egjswYNYM+4X0aNYbSylL2Fe2rvgjUvOWX5R+3\nf5Bf0HHfEBwvDlGto2jld+qqxb0Fe3ll3Sss2rWIAN8Aru17Ldf3vf7kiQ2sXjX/GQotWsIflp5y\nsMHBglI+Xp3BhykZ7DpcROuWLbhoQASxXXaxNvdrftn/Cz7iw7lR5zLhrAlEtUxg+a786oJMjv0b\nYdfwVtVJfli3MNq3cW0h5lSyC8v4ZE0mc1emsz0rl6AO3+Ab+iNt/EJ4fMTDjO069sT/99zd8MrZ\n0LG/1bB7uq6s9aRJvxFl5BZzyUs/0aFNAB/dPtwjR0HabIbNBwpO2ge9d0erD/qwbuEYQ3UD19GS\nenpOSfVX56PCg/yPG+jj2KWvc0hAs+liWlRRxPRl01m8ezGjokfxzNnPnNAH3xMcKT/CvsJ9ZBRm\nnPTCUFJZctz+oS1Dj/uGcLQd4du937IgbQF+Pn5M6j2JG+JuIDQg9PRPvvUL+OAamPg6DPhdrbEa\nY1i5O5e5K9NZtGE/JRVV9OzQmoviWxESGMD6PVUs25HNgQKrT36n4IDqQkp9xzK4gjGGtel5zEtJ\n57PNK7FFzMM3YB+xgUO5e9CdJEb2JCTQ4dvQujnwyR/g/Mfg3AcaLC5N+o2ktKKKq2b9wq6sIpLv\nOpvYiJN89fVAp+qDflSbli1qJPVj1TBRoYEEtfSsRsaGsCNvB/d+fy97CvZwd8Ld3BB3Q6PVo7uS\nMYbcstzqi0FGYcZxVUf7ivZRabMKAH4+flzV6ypu7n8zEYERzj3BmxdCfjrcvQZ8z6zAc6S0gs/W\n7WdeSjpr0/OA48cyDO8eTkwjjmU4U8XllXy2LoNZ697ikG8y4mPvdFDVhgDaEezXgfaBnTgv7xfi\nsjcQcMEs+vQbR4Cf66t7Nek3kofmb+CDFXuZdd1gLujX0d3h1FlZZRUbM/Px97UaUYMD/c7oH63S\nVsnhksNEBEa4rddJflk+xhiX1Gd/sesLpi+bTmCLQP557j9J6tR81/6pslWRVZJFZmEm0W2iad+q\nvfMHp6+EN34LF/wdht9Rrzh2ZBVSWWXo2aFp9T47avneNL5M+4Xd+ekcKN5HbvkBim1Z2HzzEDlW\noDLGBx9bCK2kHSF+HegY1JmY4Gh6hXcloXN3ereLqtPza9JvBB+mpPOnj9Zz22+68+CFvd0dToMy\nxnC45PCJ9clHrJLjwaKDVJpKolpHcUf8HVwUe9Hph+K7UFZxFq9veJ0Pt31Iha3ihPrsoz1ejt5O\nV59dUVXBP1P+yQdbPiChfQLP/+b5M0uC3mbutbDrR7g31blRtV6otKKcDQf3siP1c/zW/4M1wX35\nNSCW/IpDlJIFvgXV+/pXRbPqxkV1eh5nk37z/47eQDbty+fRBRsZ0T2cB8Y2/uRKDaGwvJC9R/Ye\nl8yPJvd9hfsoqyo7bv+IwAg6t+7MgHYDiIqNIjwwnE/TPuXhnx7mtQ2vcUf8HYztOrbBqkRySnN4\na+NbzNkyhwpbBZefdTndgrtVx5x+JJ1f9//qVH12VOso2rZsy99X/J31Weu5vu/1/HHwH5vEoCm3\nyd5hTTtw9r2a8E8jwM+fIVFnMSTqHpBsrvj5Rbjmj9D7YgDySopYu38Xmw7tapRvOFrSr4P84gou\neXkpFZWGhXef7dIZGBtTcUUxqw+tZsX+FSw/sJwtOVuwGYd6ff821V0Aa3YL7NS600kHp9iMjW/3\nfsvMtTNJy0ujZ2hP7oy/k/Oiz3PZH3R+WT7vbHqH9za/R2lVKRfHXsxtA2+jS9suJ+xrjCGnNKf6\nwnW6+mywer08OeJJxsaMdUmszdrCe61J0/64Edp0cHc0TUNlObw+Ggoy4fZl0MZ1VcJavdNAbDbD\nTe+s5Ke0w8z9w3AGdamlZ4MHKasqY92hddWDgTZkbaDSVNLCpwUD2w0kqWMSvUJ7Edkmks6tO9er\nl0qVrYovd3/JK+teYU/BHvqF9+OuhLsY0XlEnZN/UUUR/0v9H+9seocjFUe4IOYC7hh4B91CutUr\nzqySLDKOZLC/aD8J7ROIalO3OlWvUpgFL8RZg44ue8nd0TQtWVth1m+g6wiY/BHUMgGcs7R6p4G8\n/F0a323N4snx/Tw+4VfYKth0eJOV5PevYM2hNZTbyvERH+LC45jSbwpJnZJIaJ9w+iHldeDr48vF\n3S7mgpgL+GzHZ8xaP4vbltxGQvsE7kq4iyEdhzh9rpLKEuZsmcObG98kryyP86LP4874O+kV1ssl\ncXYM6kjHoKbbCO8WK1+zpjkefpe7I2l62vWCC56Gz++HFbNh2G2N+vRa0j8DP2zLYupbKxg/sDP/\nujre43oYVNmq2Jq7lRX7rZL8qoOrKK4sBqBXaC+SOiUxtONQBnUYRBv/No0aW0VVBZ+kfcKs9bM4\nVHyIoZ2GMi1+GvHt4095TFlVGR9t+4jX1r9Gdmk2IyNHMi1+GnERcY0YuTpBeRH8Kw66DINJH7g7\nmqbJGGtsw47v4NbvoUPfep9Sq3dc7OgArI5tA5h/xwiPW+QkqziLKV9OIf1IOgCxwbEkdUxiaKeh\nJHZIrH2ATSMpqyrjw60f8tqG18gpzeGcyHO4M+HO42YurLBVsCBtAbPWzeJg8UGGdBzCXQl3kdA+\nwY2Rq2rLZ8MXf4IbvvT4pQE9WmEWvDLcWgjmlm+tEc314NKkLyLjgBex1sh93RjzbI3H/wWcZ7/b\nCmhvjAmxP1YFbLA/ttcYc9npnssTk76nD8CqsFVw8+Kb2ZyzmUeGPsLwzsM9vpthcUUxc7ZaVTb5\nZfmM7jKa2wbextacrbyy7hUyCzMZ2G4gdyXcdfpJrVTjqqqElwZB6/Zw09f1W99Rwbav4P3fwfBp\ncMEz9TqVy+r0RcQXmAmMATKAlSKSbIxJPbqPMeZeh/3vAhyLZCXGmFN/h28C/vpZKusz8pl93WCP\nS/gAL6x6gdWHVvPsOc9ycbeL3R2OU1r5teLGuBu5qudVvLv5Xf676b98s/cbAPqE9eGR0Y9wduTZ\nHleF5vU2J1urVl3wjCZ8V+g5FobcDL+8DGf9FrqfV/sx9eRMHUUSkGaM2QkgInOA8UDqKfafBEx3\nTXjuNy8lnQ9W7OX2Ud0Z64EjbhfvXsx/U//LpN6TmkzCd9TavzW3D7yd3/f+PQvSFhDVJorzo8/X\nZO+JjIFl/4aw7tDrIndH03yMecoa4LbgdqsbZ6tTLwjjCs70FYoE0h3uZ9i3nUBEugKxwLcOmwNE\nJEVEfhWRy+scqRtszMznMfsArPvHeN4ArJ15O3n858cZ2G4gf0r8k7vDqZfglsFM6TeF0V1Ga8L3\nVLt/gn1rYMS0Bp0t0uv4t4IrXoeiw/DZPdDA7ayubo28BvjIGOM4JWNXY0ymiHQDvhWRDcaYHY4H\nicitwK0AXbqcOMDGHfKLK7j9vVWEBfnz70kJHrcmbXFFMfd+fy8BLQJ4/jfP43eGE10pdcaW/Rta\nRcDASe6OpPnpNNBaSL2yxEr6DVjwcSbpZwLRDvej7NtO5hrgTscNxphM+8+dIvI9Vn3/jhr7zAZm\ng9WQ60zgDclmM/xx7hoO5Jcy7w/DPW7ErTGGx5c9zu6C3cweM1v7mKuGdzAVtn8F5z0Cfu6d2rjZ\nGjGtUZ7GmeLrSqCHiMSKiD9WYk+uuZOI9AZCgV8ctoWKSEv77xHASE7dFuAxjg7AevySviR44ACs\nd1PfZfHuxdydcLf2bFGNY9lL4NfKanRUTVqtJX1jTKWITAMWY3XZfNMYs0lEngRSjDFHLwDXAHPM\n8X1A+wCzxJpX1Ad41rHXjyf6YVsW/1qyjQkJkVw7rGGXN6uLVQdXMWPVDEZ3Gc2NcTe6OxzlDQr2\nwYYPIfHGBm9kVA3PqTp9Y8wiYFGNbY/XuP/ESY5bBvSvR3yNKiO3mHvmrKFXhzb8bUJ/j2tQzCrO\n4oEfHiCqTRRPjXzK4+JTzdSvr4Cpqvd8+cozeNaw0kZUWlFFZl6JtSRgTjHpucUsST1Ilc3w6rWD\nCfT3rN4JFbYKHvjhAYoqipg9ZnajT6OgvFRpAax6G/peDqEx7o5GuUCzTfqVVTb255dWr/Oa4bCo\nd3puMQcLjp8b3t/Xh6iwQF6alECMhw/A6hHaw93hKG+x6m0oK4CRd7s7EuUizSbp5xaV8+wXW6yF\nvHOL2Z9XSqXtWPOCj0Cn4ECiQgM5p0e74xbyjg4LJCLIjzc2vY60Eoxp51FVJ019AJZqoirLraqd\nmHOgs8571Fw0m6Tv38KHb7YcIjoskIToUC4dcHRBbyupdwoOxL/FqTsrzd0yl5lrZwIwIGIAdybc\nyfBOw92e/JvTACzVxGz8CI7sg8v+7e5IlAvpLJtYy+5d8skl9A3ry4WxFzJr/Sz2F+1ncIfBTIuf\nRmLHWucwahBFFUVM+nwS+WX5zL1krvbHV43HGHhlhPX77ct0np0mwNkJ1zxrmKmbvLj6RUoqSnh4\n6MNc0fMKFk5YyMNDH2ZvwV5uWHwDt351K+uz1jdqTMYYHv/5cfYU7OGf5/5TE75qXGlL4FAqjLhb\nE34z4/VJf33WeuZvn8+1fa+tXnbP39efSb0nsWjiIh5IfIAtOVuYvGgy076ZxubszY0S17up7/LV\nnq+4Z9A9JHVKapTnVKrazy9Cm84Qd4W7I1Eu5tVJv8pWxTPLn6F9YHtuG3jikmUBLQKY0m8KX1zx\nBXcn3M3qQ6u5auFV3Pf9fezI23GSM7qG4wCsG/rd0GDPo9RJ7VsDu5fCsNuhhb+7o1Eu5tVJ/+Pt\nH5OancoDQx4gyO/U3TSD/IK4ZcAtfHnFl9w28DaW7VvGhE8n8ODSB9lTsMelMekALOV2P/8bWraF\nwVPdHYlqAF6b9HNLc3lx9YskdUxiXMw4p45p69+WO+Pv5MuJX3JD3A18s+cbxi8Yz/Rl09lXuK/e\nMTkOwPrXqH/pACzV+HJ2QeoCK+EHtHV3NKoBNJsum2fqxdUvUlxRzENJD51xaTokIIR7B9/LdX2v\n440NbzB361ySdyRzRY8rjlvr9UytPLCS1YdW89w5z+kALHXm9q2F/HQI6QqhXSEg+MzP8et/QHyt\nqh3VLHll0t94eCPzt8/nur7XcVboWXU+T0RgBH9J+gtT+k3htfWv8fG2j5lr5tYrtuv6XsdF3XRV\nInWGKkrhv+OhNO/YtoAQCOliXQBCulrTKBy9IIR0OXGK5OIcWPM/GHAVtO3cqOGrxuN1Sd9mbDzz\n6zOEB4Zz+0DXlGY6BnXkseGPcc/geygqL6rzeXx9fD1+QXPloTZ/ZiX8S/9tlfDz9kDuHutn1lbY\n/jVUlh5/TOsO1kXg6IUhdw9UFMOIu9zzGlSj8LqkP3/7fDZmb+TZc56ltX9rl567rX9b2vprPahy\ngzX/tRJ4wnXgc5KmOpsNig4duxDk7oG83dbPjBWw6RNrJs1eF0H7Po0evmo8XpX080rzeGH1CyR2\nSOSiWK1CUc1Ezk5rYe3zHz15wgdre5uO1q3LSRbeqaqEgkxord80mzuvSvr/XvNvCssLeXjow9oV\nUjUfa94D8YH4yXU/h28Lq4pHNXte02Vz0+FNfLTtIyb1nqQ9Y1TzUVUJa9+Ds8Zo46tyilckfZux\n8bflfyMsIIw74nX1H9WMpC2BI/th0PXujkQ1EV6R9BekLWD94fXcn3i/DnhSzcuadyGoPfS8wN2R\nqCbCqaQvIuNEZKuIpInIgyd5/F8istZ+2yYieQ6PTRGR7fbbFFcG74z8snxeWPUCg9oP4pJulzT2\n0yvVcI4chK1fQPwk8PVzdzSqiai1IVdEfIGZwBggA1gpIsnGmNSj+xhj7nXY/y4gwf57GDAdSAQM\nsMp+bK5LX8VpvLTmJQrKC7TxVjU/6z6wulkmXOfuSFQT4kxJPwlIM8bsNMaUA3OA8afZfxLwgf33\nC4CvjTE59kT/NeDcRDcukJqdyryt87im9zX0CuvVWE+rVMMzxqra6TICIrRjgnKeM0k/Ekh3uJ9h\n33YCEekKxALfnsmxInKriKSISEpWVpYzcdfKZmw8s/wZQgNCtfFWNT97f4HsNBikpXx1ZlzdkHsN\n8JExpupMDjLGzDbGJBpjEtu1a+eSQD5N+5T1Weu5b/B9OkpWNT+r/2tNf9z3dF+6lTqRM0k/E4h2\nuB9l33Yy13CsaudMj3WZ/LJ8Xlj9AgntE7i0+6UN/XRKNa7SfNi0APpfCf6nXgdCqZNxJumvBHqI\nSKyI+GMl9uSaO4lIbyAU+MVh82JgrIiEikgoMNa+rUG9vOZl8sryeGToI/iIV/RKVd5kw0dQWaIN\nuKpOau29Y4ypFJFpWMnaF3jTGLNJRJ4EUowxRy8A1wBzjDHG4dgcEXkK68IB8KQxJse1L+F4W3K2\nMG/bPK7udbU23qrmafV/oUM5NDY3AAAfjElEQVR/6Jzg7khUE+TU3DvGmEXAohrbHq9x/4lTHPsm\n8GYd4zsjR6dNDmkZwrSEaY3xlEo1rv3rYf9auPAfoF2QVR00q7qPz3Z8xtqstfxx0B+18VY1T2ve\nBd+W0P937o5ENVHNJukXlBcwY9UMBrYbyPiztEeDaoYqSmD9XOhzKbQKc3c0qolqNlMrl1eVM7jD\nYG7pf4s23qrmafNCq+eOTq6m6qHZJP2IwAhmjJrh7jCUajir37HWuY05x92RqCZMi8RKNQU5O2H3\nUki49tSrYynlBP3rUaopWPO/+q+OpRSa9JXyfFWVsPZ9XR1LuYQmfaU8na6OpVxIk75Snk5Xx1Iu\npElfKU+mq2MpF9Okr5QnW/e+ro6lXEqTvlKeyhhYratjKdfSpK+Up9qzDHJ2aAOucilN+kp5qjXv\n6upYyuU06SvliY5bHauVu6NRzYgmfaU8ka6OpRqIJn2lPJGujqUaiCZ9pTzN0dWxBl2nq2Mpl3Mq\n6YvIOBHZKiJpIvLgKfa5SkRSRWSTiLzvsL1KRNbabycsqK6UqkFXx1INqNb59EXEF5gJjAEygJUi\nkmyMSXXYpwfwEDDSGJMrIu0dTlFijIl3cdxKNU9HV8fqe5mujqUahDMl/SQgzRiz0xhTDswBavYh\nuwWYaYzJBTDGHHJtmEp5iaOrY2kDrmogziT9SCDd4X6GfZujnkBPEflZRH4VkXEOjwWISIp9++X1\njFep5k1Xx1INzFXLJbYAegCjgCjgRxHpb4zJA7oaYzJFpBvwrYhsMMbscDxYRG4FbgXo0qWLi0JS\nqok5ujrW+Y/q6liqwTjzl5UJRDvcj7Jvc5QBJBtjKowxu4BtWBcBjDGZ9p87ge+BE/qgGWNmG2MS\njTGJ7dq1O+MXoZRLFB2GsiPue35dHUs1AmeS/kqgh4jEiog/cA1QsxfOAqxSPiISgVXds1NEQkWk\npcP2kUAqSnmavHR4aTA83wsW3Al7frEmPGtoFaWw8WP47+WwdAb0HKerY6kGVWv1jjGmUkSmAYsB\nX+BNY8wmEXkSSDHGJNsfGysiqUAV8CdjTLaIjABmiYgN6wLzrGOvH6U8gq0KPrkNbJXQbwKkLoC1\n/4PwHtZC5AMnQZsOrn3OAxusGTQ3zIOSXAjuAqMehKRbXfs8StUgpjFKM2cgMTHRpKSkuDsM5U1+\n+hcseQLGz7SSfFkhpH5q9Zff+wuIr7VqVcJ10GMs+NaxKawkDzZ8aFXj7F8Lvv7Q51LrvLG/0Xp8\nVS8issoYk1jbfq5qyFWqadq3Fr59BvpcdqwuvWVrSJhs3Q5vt5L/2g9g6yJo3QEGXmMlamfmuLfZ\nrMbZNf+DzclQWWpNr3DhP6zBV9oXXzUyLekr71VeDLPOhfJCuH3Z6RNwVQVs/9q6AGxbbK1m1WW4\nlfz7jrcuFI7yM6wLxdr/Qe5uaBkMA35n7d9Zxyoq19OSvlK1+epRyN4O1y2ovcTt6we9L7JuRw7A\nujnWBeDTO+CLP0PcRIi/Fo7st7bv+BaMDWLPhfMehT6XgF9g47wupU5DS/rKO239Ej64GoZPgwue\nqds5jIG9v1pJftMnUFFsbW8bCfG/t6qLwmJdF7NSp6ElfaVOpfAQfHondIiD0Y/X/Twi0HW4dbvw\nOdiyCFqFQ/fzwMfXdfEq5UKa9JV3McZK+GVHYOpCaNHSNedt2QYGXu2acynVgDTpK++y8nXY/hWM\new7a93F3NEo1Ou0YrLxH1lar8bb7aBj6B3dHo5RbaNJX3qGyHD6+GfyD4PL/6IpUymtp9Y7yDt89\nDQfWwzXvQ5uO7o5GKbfRkr5q/nb9CD//GwZNgd4XuzsapdxKk75q3kpyrcnUwrvDuL+7Oxql3E6r\nd1TzZQwsvBcKD8JNX1v1+Up5OS3pq+Zr3RxrpOyohyBykLujUcojaNJXzVPublj0J+gyAs6+193R\nKOUxNOmr5qeqEubfanXLnDhLp0RQyoHW6avm56cZkL4cJr4GIV3cHY1SHkVL+qp5yUiB75+FuCth\nwFXujkYpj6NJXzUfZYUw/xZrYfGL/8/d0SjlkZxK+iIyTkS2ikiaiDx4in2uEpFUEdkkIu87bJ8i\nItvttymuClyp41SWW90zc3bBhFchMMTdESnlkWqt0xcRX2AmMAbIAFaKSLIxJtVhnx7AQ8BIY0yu\niLS3bw8DpgOJgAFW2Y/Ndf1LUV4rfSUk3wVZm63umTFnuzsipTyWMyX9JCDNGLPTGFMOzAHG19jn\nFmDm0WRujDlk334B8LUxJsf+2NfAONeErrxeWSF88SC8MQbKCmDSXBh10i+iSik7Z3rvRALpDvcz\ngKE19ukJICI/A77AE8aYL09xbGSdo1XqqO1LrOqc/HQYcrO1AlZAW3dHpZTHc1WXzRZAD2AUEAX8\nKCL9nT1YRG4FbgXo0kW72KnTKMqGxQ/B+rkQ0RNu/BK6DHN3VEo1Gc4k/Uwg2uF+lH2bowxguTGm\nAtglItuwLgKZWBcCx2O/r/kExpjZwGywFkZ3MnblTYyBDR/Bl3+B0nw4989w7gOuW+5QKS/hTJ3+\nSqCHiMSKiD9wDZBcY58F2JO7iERgVffsBBYDY0UkVERCgbH2bUo5Ly8d3r8K5t8MoTHwhx/h/Ec0\n4StVB7WW9I0xlSIyDStZ+wJvGmM2iciTQIoxJpljyT0VqAL+ZIzJBhCRp7AuHABPGmNyGuKFqGbI\nZrPWtP3mr2BsMO5ZSLpVp1VQqh7EGM+qTUlMTDQpKSnuDkO526Et8Nnd1nQK3c+HS16A0K7ujkop\njyUiq4wxibXtp3PvKM9SWQ4//QuWPm/Nfz9hFgy4Wte0VcpFNOkrz5GRAp9OswZZxV1pVee0bufu\nqJRqVjTpK/crK4Rvn4blr1rz5kyaC710DJ9SDUGTvnIvHWSlVKPSpK/cQwdZKeUWmvRV4zpukFUB\n/OYvcM792udeqUaiSV81nrx0+Pw+2P4VRCbCZS9Bh77ujkopr6JJXzU8WxWsfEMHWSnlATTpq4Z1\naIs1133GCug+Gi75lw6yUsqNNOmrhlFZbi1Q/uPz0LINTJhtrVmrg6yUcitN+sr10ldA8t06yEop\nD6RJX7lOWSF88ySsmA1tI+H386DnBe6OSinlQJO+co3SfHjtfMjeAUm3WIOsWrZxd1RKqRo06SvX\n+PwByNkF130C3c9zdzRKqVNwZhEVpU5vw0ewYZ410EoTvlIeTZO+qp+8vbDwPogeao2sVUp5NE36\nqu5sVfDJbdaAqwmzwFdrC5XydPpfquru5xdgz89w+SsQFuvuaJRSTtCSvqqbzNXw3d+g7+UwcJK7\no1FKOcmppC8i40Rkq4ikiciDJ3l8qohkicha++1mh8eqHLYnuzJ45SblRTD/FmjdwZpWQUfZKtVk\n1Fq9IyK+wExgDJABrBSRZGNMao1d5xpjpp3kFCXGmPj6h6o8xuJHrP74U5KhVZi7o1FKnQFnSvpJ\nQJoxZqcxphyYA4xv2LCUx9qyCFa9BSPugthz3R2NUuoMOZP0I4F0h/sZ9m01XSEi60XkIxGJdtge\nICIpIvKriFx+sicQkVvt+6RkZWU5H71qXEcOQvI06Ngfzn/U3dEoperAVQ25nwExxpgBwNfAOw6P\ndTXGJAK/B14Qke41DzbGzDbGJBpjEtu104m5PJIx8OkdVn3+FW/oSldKNVHOJP1MwLHkHmXfVs0Y\nk22MKbPffR0Y7PBYpv3nTuB7IKEe8Sp3WfEapC2BsU9Du17ujkYpVUfOJP2VQA8RiRURf+Aa4Lhe\nOCLSyeHuZcBm+/ZQEWlp/z0CGAnUbABWnu7QFvj6MegxFobcXPv+SimPVWvvHWNMpYhMAxYDvsCb\nxphNIvIkkGKMSQbuFpHLgEogB5hqP7wPMEtEbFgXmGdP0utHebLKMvj4ZvBvDeNnavdMpZo4Mca4\nO4bjJCYmmpSUFHeHoY766lFY9hJMmgu9xrk7GqXUKYjIKnv76WnpiFx1aju/txJ+4o2a8JVqJjTp\nq5MrzoFPbofwHjD2GXdHo5RyEZ1wTZ3IGFj4Ryg6BJOWgH8rd0eklHIRLemrE619H1I/tQZgddYe\ntko1J5r01fFydsIXf4auZ8OIu90djVLKxTTpq2OqKmH+H0B8YcKr4OPr7oiUUi6mdfrqmB//CRkr\nrGkWQqJr318p1eRo0ldQdBi+fBA2fAgDrob+V7o7IqVUA9Gk782MgfXzrIRfdgRGPQRn3+fuqJRS\nDUiTvrfK2wsL77UmUYsaApe9BO37uDsqpVQD06TvbWxVsGI2fPOUdf/Cf1iTqGmjrVJeQZO+Nzm0\nGT6dBpkpcNYYuGQGhHRxd1RKqUakSd8bVJbB0v+DpTOgZRuY+Br0/53OmKmUF9Kk39ztXQ7Jd8Hh\nrdD/Khj3dwiKcHdUSik30aTfXJUdgW+etFa8ahsJkz+CHmPcHZVSys006TdH2xbDwvugIBOG/sGa\nQ6dlG3dHpZTyAJr0m5Oiw/DFX2DjR9CuN9z0FUQnuTsq1URVVFSQkZFBaWmpu0NRDgICAoiKisLP\nz69Ox2vSb0qMgcJDkLcHcvdA3m77T/v9gkxA7IOs7oUWLd0dsWrCMjIyaNOmDTExMYg2+nsEYwzZ\n2dlkZGQQGxtbp3M4lfRFZBzwItYaua8bY56t8fhU4J9Apn3Ty8aY1+2PTQEetW9/2hjzTp0i9RYl\nuccSed7e45N63l6oLDl+/6D2ENrVGmAVeqXVWNu+t3tiV81KaWmpJnwPIyKEh4eTlZVV53PUmvRF\nxBeYCYwBMoCVIpJ8kgXO5xpjptU4NgyYDiQCBlhlPza3zhE3V3t+gU/+YCV4Ry2DIbQLRPSwGmJD\nulpJPqSr1cdeFzhRDUgTvuep72fiTEk/CUgzxuy0P+EcYDxQM+mfzAXA18aYHPuxXwPjgA/qFm4z\nZAz8+h/46jErmY99+vjEHhji7giVajZiYmJISUkhIsJ7uy07k/QjgXSH+xnA0JPsd4WInAtsA+41\nxqSf4tjIOsba/JQWQPI0a5Wq3pfA5f+BgGB3R6WURzLGYIzBx0eXAakPV717nwExxpgBwNfAGdXb\ni8itIpIiIin1qatqUg6mwmvnweaFMOZJuPp/mvCVqmH37t306tWL66+/nri4OG666SYSExPp168f\n06dPr94vJiaG6dOnM2jQIPr378+WLVsAyM7OZuzYsfTr14+bb74ZY0z1MTNmzCAuLo64uDheeOGF\n6ufr3bs3U6dOpWfPnkyePJklS5YwcuRIevTowYoVKxr3DWgAzpT0MwHHFTWiONZgC4AxJtvh7uvA\nPxyOHVXj2O9rPoExZjYwGyAxMdHUfLzZWTfXWnjcvzVMSYaYs90dkVKn9dfPNpG6r8Cl5+zbuS3T\nL+1X637bt2/nnXfeYdiwYeTk5BAWFkZVVRWjR49m/fr1DBgwAICIiAhWr17Nf/7zH55//nlef/11\n/vrXv3L22Wfz+OOP8/nnn/PGG28AsGrVKt566y2WL1+OMYahQ4fym9/8htDQUNLS0vjwww958803\nGTJkCO+//z4//fQTycnJ/O1vf2PBggUufR8amzMl/ZVADxGJFRF/4Bog2XEHEenkcPcyYLP998XA\nWBEJFZFQYKx9m3eqLLMGTX1yK3SKh9uWasJXqhZdu3Zl2LBhAMybN49BgwaRkJDApk2bSE091rQ4\nceJEAAYPHszu3bsB+PHHH7n22msBuPjiiwkNDQXgp59+YsKECQQFBdG6dWsmTpzI0qVLAYiNjaV/\n//74+PjQr18/Ro8ejYjQv3//6vM2ZbWW9I0xlSIyDStZ+wJvGmM2iciTQIoxJhm4W0QuAyqBHGCq\n/dgcEXkK68IB8OTRRl2vk7cX5k2BfathxF0wejr41m1whVKNzZkSeUMJCgoCYNeuXTz//POsXLmS\n0NBQpk6detzAsZYtrXEpvr6+VFZW1vn5jp4HwMfHp/q+j49Pvc7rKZyq0zfGLDLG9DTGdDfGPGPf\n9rg94WOMecgY088YM9AYc54xZovDsW8aY86y395qmJfh4bYvgVnnQnaaVXc/9mlN+EqdoYKCAoKC\ngggODubgwYN88cUXtR5z7rnn8v777wPwxRdfkJtr9RY/55xzWLBgAcXFxRQVFfHJJ59wzjnnNGj8\nnkJH5DYkWxX88A/44Tlo3xeufhfCu7s7KqWapIEDB5KQkEDv3r2Jjo5m5MiRtR4zffp0Jk2aRL9+\n/RgxYgRduljrRwwaNIipU6eSlGRNU3LzzTeTkJDQLKpvaiOOrdmeIDEx0aSkpLg7jPoryob5N8OO\nb2HgJLh4hg6kUk3K5s2b6dNHl9D0RCf7bERklTEmsbZjtaTfEDJWwbzroegQXPICDJ6qC5YopTyC\nJn1XMgZWvg5fPgRtO1mzXHZOcHdUSilVTZO+q9hs8OkdsO4D6DEWJsyCVmHujkoppY6jSd9V1r5n\nJfxzHoDzHgEdKq6U8kCamVyhJBeWTIfooZrwlVIeTbOTK3z7jJX4L3peE75SyqNphqqv/esg5Q0Y\ncjN0GuDuaJRqVkaMGFHrPkuXLqVfv37Ex8ezefNm4uLiGiGypkuTfn3YbPD5AxAYZlXrKKVcatmy\nZbXu89577/HQQw+xdu1aAgMDGyGqpk2Tfn2s+wAyVsCYv+piJ0o1gNatWwPw/fffM2rUKK688kp6\n9+7N5MmTMcbw+uuvM2/ePB577DEmT5583LFvv/0206YdW8zvkksu4fvvvwfgq6++Yvjw4QwaNIjf\n/e53FBYWAqeeormwsJAbbriB/v37M2DAAD7++OPTnsfRqFGjuOeee4iPjycuLq56euacnBwuv/xy\nBgwYwLBhw1i/fj0AP/zwA/Hx8cTHx5OQkMCRI0dc+I5q7526K8mDrx+31qYd+Ht3R6NUw/riQTiw\nwbXn7NgfLny29v3s1qxZw6ZNm+jcuTMjR47k559/5uabb+ann37ikksu4corr3RqGoXDhw/z9NNP\ns2TJEoKCgnjuueeYMWMGjz/+OHDyKZqfeuopgoOD2bDBeg9yc3NrPY+j4uJi1q5dy48//siNN97I\nxo0bmT59OgkJCSxYsIBvv/2W66+/nrVr1/L8888zc+ZMRo4cSWFhIQEBAU6/R87QpF9X3/0NSnLg\noo+18VapRpCUlERUVBQA8fHx7N69m7PPPvOpyX/99VdSU1Or5+4pLy9n+PDh1Y87TtE8f/58AJYs\nWcKcOXOq9wkNDWXhwoWnPY+jSZMmAdYEcAUFBeTl5fHTTz9Vf2M4//zzyc7OpqCggJEjR3Lfffcx\nefJkJk6cWP2aXUWTfl0c2AArX4PEG6FzvLujUarhnUGJvKE4TnnszPTJLVq0wGazVd8/Og2zMYYx\nY8bwwQcnX6rb2SmaazuPo5qLmZ9ucfMHH3yQiy++mEWLFjFy5EgWL15M7969a30OZ2kR9UwZY2+8\nDYXzH3V3NEqpU4iJiWHt2rXYbDbS09Or69KHDRvGzz//TFpaGgBFRUVs27bttOcaM2YMM2fOrL6f\nm5t7RueZO3cuYC3eEhwcTHBwMOeccw7vvfceYLVZRERE0LZtW3bs2EH//v35y1/+wpAhQ6rbFVxF\nk/6ZWjcH0n+F3z5hJX6llEcaOXIksbGx9O3bl7vvvptBgwYB0K5dO95++20mTZrEgAEDGD58eK2J\n9dFHHyU3N5e4uDgGDhzId999d0bnCQgIICEhgdtuu616ycYnnniCVatWMWDAAB588EHeecdaWvyF\nF14gLi6OAQMG4Ofnx4UXXujCd0WnVj4zpfnwUiKEdIGbvta6fNWs6dTKrjFq1Cief/55EhNrnfXY\naTq1cmP57u9QlAWT52nCV0o1SZr0nXVgI6yYDYk36HTJSimnHR0b4CmcKq6KyDgR2SoiaSLy4Gn2\nu0JEjIgk2u/HiEiJiKy13151VeCNyhhY9AAEBMP5j7k7GqWUqrNaS/oi4gvMBMYAGcBKEUk2xqTW\n2K8NcA+wvMYpdhhjmna/xvXzYO8vcOmLOke+UqpJc6aknwSkGWN2GmPKgTnA+JPs9xTwHFDqwvjc\nr7QAvn4MOg+ChOvdHY1SStWLM0k/Ekh3uJ9h31ZNRAYB0caYz09yfKyIrBGRH0TknJM9gYjcKiIp\nIpKSlZXlbOyN4/tnofAQXKzTJiulmr56ZzER8QFmAPef5OH9QBdjTAJwH/C+iLStuZMxZrYxJtEY\nk9iuXbv6huQ6B1Nh+asweApEDnZ3NEp5neY8tfLUqVP56KOPGv15nUn6mUC0w/0o+7aj2gBxwPci\nshsYBiSLSKIxpswYkw1gjFkF7AB6uiLwBmcMLPoTBLSF0dPdHY1SXkmnVnY9Z5L+SqCHiMSKiD9w\nDZB89EFjTL4xJsIYE2OMiQF+BS4zxqSISDt7QzAi0g3oAex0+atoCBs+gj0/wejHtfFWKTdpDlMr\nv/baawwZMoSBAwdyxRVXUFxcXP3YkiVLSExMpGfPnixcuBCATZs2kZSURHx8PAMGDGD79u0uejct\ntfbeMcZUisg0YDHgC7xpjNkkIk8CKcaY5NMcfi7wpIhUADbgNmNMjisCb1ClBfDVo9ApHgZNcXc0\nSrndcyueY0uOa+eA6R3Wm78k/cXp/Zvq1MoTJ07klltuAazpHN544w3uuusuAHbv3s2KFSvYsWMH\n5513Hmlpabz66qvcc889TJ48mfLycqqqqpx+j5zh1OAsY8wiYFGNbSdOGm1tH+Xw+8fAx/WIzz1+\neA4KD8I174OPr7ujUUrRdKdW3rhxI48++ih5eXkUFhZywQUXVD921VVX4ePjQ48ePejWrRtbtmxh\n+PDhPPPMM2RkZDBx4kR69Ohxxq/xdHREbk2HtliNt4OugyhtvFUKOKMSeUNpqlMrT506lQULFjBw\n4EDefvvt40bonmzK5d///vcMHTqUzz//nIsuuohZs2Zx/vnnn/Y5zkTz6YNYVgj/6AazR8G8Kdaq\nVitfh+1L4PB2qHBi+MDRkbf+rWH0Ew0dsVKqAXnK1MpHjhyhU6dOVFRUVE+lfNSHH36IzWZjx44d\n7Ny5k169erFz5066devG3Xffzfjx46uXUXSV5lPSryqHvuMhdw8c3AhbF1nbHLXpBCFdIbSr9TOk\ny7Hf20ZC6gLYvRQu/j8ICnfP61BKuYTj1Mp9+vQ56dTKZWVlADz99NP07HnqjoWPPvood955J3Fx\ncfj6+jJ9+nQmTpzo1Hmeeuophg4dSrt27Rg6dOhxa9526dKFpKQkCgoKePXVVwkICGDevHm8++67\n+Pn50bFjRx5++GGXvi/Nd2plmw0KD1gXgbw9NX7uhYIMMMe++uHTAsQH2veBW77Tunzl9XRqZc+l\nUyufjI8PtO1s3bqeZN3KqgrIzzj+glCwH4bfqQlfKdVsNd+kXxtfPwiLtW5KKeUlmk9DrlJKqVpp\n0ldKnZKntfmp+n8mmvSVUicVEBBAdna2Jn4PYowhOzubgICAOp/De+v0lVKnFRUVRUZGBh433bmX\nCwgIqB6ZXBea9JVSJ+Xn50dsrHZ0aG60ekcppbyIJn2llPIimvSVUsqLeNw0DCKSBexxdxxuFAEc\ndncQbqSvX1+/vv666WqMqXW9WY9L+t5ORFKcmT+judLXr69fX3/Dvn6t3lFKKS+iSV8ppbyIJn3P\nM9vdAbiZvn7vpq+/gWmdvlJKeREt6SullBfRpO8mIhItIt+JSKqIbBKRe+zbw0TkaxHZbv8Z6u5Y\nG5KI+IrIGhFZaL8fKyLLRSRNROaKiL+7Y2woIhIiIh+JyBYR2Swiw73p8xeRe+1/+xtF5AMRCWju\nn7+IvCkih0Rko8O2k37mYvm3/b1YLyKDXBGDJn33qQTuN8b0BYYBd4pIX+BB4BtjTA/gG/v95uwe\nYLPD/eeAfxljzgJygZvcElXjeBH40hjTGxiI9T54xecvIpHA3UCiMSYO8AWuofl//m8D42psO9Vn\nfiHQw367FXjFJREYY/TmATfgU2AMsBXoZN/WCdjq7tga8DVH2f/IzwcWAoI1MKWF/fHhwGJ3x9lA\nrz0Y2IW9Xc1hu1d8/kAkkA6EYU38uBC4wBs+fyAG2FjbZw7MAiadbL/63LSk7wFEJAZIAJYDHYwx\n++0PHQA6uCmsxvAC8Gfg6Ar14UCeMabSfj8DKzk0R7FAFvCWvXrrdREJwks+f2NMJvA8sBfYD+QD\nq/Cez9/RqT7zoxfGo1zyfmjSdzMRaQ18DPzRGFPg+JixLu/NsnuViFwCHDLGrHJ3LG7SAhgEvGKM\nSQCKqFGV08w//1BgPNbFrzMQxInVHl6nMT5zTfpuJCJ+WAn/PWPMfPvmgyLSyf54J+CQu+JrYCOB\ny0RkNzAHq4rnRSBERI6u8xAFZLonvAaXAWQYY5bb73+EdRHwls//t8AuY0yWMaYCmI/1N+Etn7+j\nU33mmUC0w34ueT806buJiAjwBrDZGDPD4aFkYIr99ylYdf3NjjHmIWNMlDEmBqsB71tjzGTgO+BK\n+27N+fUfANJFpJd902ggFS/5/LGqdYaJSCv7/8LR1+8Vn38Np/rMk4Hr7b14hgH5DtVAdaaDs9xE\nRM4GlgIbOFan/TBWvf48oAvWbKNXGWNy3BJkIxGRUcADxphLRKQbVsk/DFgDXGuMKXNnfA1FROKB\n1wF/YCdwA1ZBzCs+fxH5K3A1Vk+2NcDNWHXWzfbzF5EPgFFYs2keBKYDCzjJZ26/GL6MVe1VDNxg\njEmpdwya9JVSynto9Y5SSnkRTfpKKeVFNOkrpZQX0aSvlFJeRJO+Ukp5EU36SinlRTTpK6WUF9Gk\nr5RSXuT/ATLkcUuVROiIAAAAAElFTkSuQmCC\n", "text/plain": [ "" ] }, "metadata": {}, "output_type": "display_data" } ], "source": [ "# Get prediction score when using increasing fraction of random training point\n", "top_train_inf = get_top_train_influence()\n", "\n", "train_infl_idx = [f[0] for f in top_train_inf]\n", "train_rand_idx = sorted(train_infl_idx, key=lambda x: random())\n", "train_abs_idx = [f[0] for f in sorted(top_train_inf, key=lambda x: abs(x[1]))]\n", "\n", "# Get scores for random training order\n", "scores_rand = get_scores(train_rand_idx)\n", "scores_infl = get_scores(train_infl_idx)\n", "scores_abs = get_scores(train_abs_idx)\n", "\n", "# Plot\n", "for scores, label in [\n", " (scores_rand, 'random'),\n", " (scores_infl, 'influence pos'),\n", " (scores_abs, 'influence abs') ]:\n", " x = [s[0] for s in scores]\n", " y = [s[1] for s in scores]\n", " plot(x, y, label)\n", "\n", "plt.legend()\n", "plt.show()" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "## Domain mismatch" ] }, { "cell_type": "code", "execution_count": 13, "metadata": { "collapsed": false }, "outputs": [], "source": [ "model = get_model(X_train[train_infl_idx[:120]], y_train[train_infl_idx[:120]])\n", "\n", "keep_issues = []\n", "for i, decision_v_actual in enumerate(zip(model.decision_function(X_test), y_test)):\n", " decision_score, label = decision_v_actual\n", " #print(decision_score, prediction)\n", " if (decision_score > 0 and label == 0 or\n", " decision_score < 0 and label == 1):\n", " keep_issues.append( (i,decision_score ) )\n", "\n", "most_wrong = sorted(keep_issues, key=lambda x : abs(x[1]), reverse=True)[:20]\n", "most_wrong_test_idx = [el[0] for el in most_wrong]" ] }, { "cell_type": "code", "execution_count": 46, "metadata": { "collapsed": false, "scrolled": true }, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
PrefixPclassSexAgeSibSpParchFareEmbarked
2223115.000000008.02922
41221133.0000001090.00002
27423129.699118007.75002
18613129.6991181015.50002
29911150.00000001247.52081
60201029.6991180042.40000
19823129.699118007.75002
15623116.000000007.73332
74801019.0000001053.10000
3111129.69911810146.52081
1423114.000000007.85420
78201029.0000000030.00000
25821135.00000000512.32921
19521158.00000000146.52081
27001029.6991180031.00000
2823129.699118007.87922
88632027.0000000013.00000
74221121.00000022262.37501
35823129.699118007.87922
22802018.0000000013.00000
\n", "
" ], "text/plain": [ " Prefix Pclass Sex Age SibSp Parch Fare Embarked\n", "22 2 3 1 15.000000 0 0 8.0292 2\n", "412 2 1 1 33.000000 1 0 90.0000 2\n", "274 2 3 1 29.699118 0 0 7.7500 2\n", "186 1 3 1 29.699118 1 0 15.5000 2\n", "299 1 1 1 50.000000 0 1 247.5208 1\n", "602 0 1 0 29.699118 0 0 42.4000 0\n", "198 2 3 1 29.699118 0 0 7.7500 2\n", "156 2 3 1 16.000000 0 0 7.7333 2\n", "748 0 1 0 19.000000 1 0 53.1000 0\n", "31 1 1 1 29.699118 1 0 146.5208 1\n", "14 2 3 1 14.000000 0 0 7.8542 0\n", "782 0 1 0 29.000000 0 0 30.0000 0\n", "258 2 1 1 35.000000 0 0 512.3292 1\n", "195 2 1 1 58.000000 0 0 146.5208 1\n", "270 0 1 0 29.699118 0 0 31.0000 0\n", "28 2 3 1 29.699118 0 0 7.8792 2\n", "886 3 2 0 27.000000 0 0 13.0000 0\n", "742 2 1 1 21.000000 2 2 262.3750 1\n", "358 2 3 1 29.699118 0 0 7.8792 2\n", "228 0 2 0 18.000000 0 0 13.0000 0" ] }, "execution_count": 46, "metadata": {}, "output_type": "execute_result" } ], "source": [ "most_wrong_passengerId = np.array(test_idx)[most_wrong_test_idx]\n", "most_wrong_df = data_df[data_df.PassengerId.isin(most_wrong_passengerId)][columns]\n", "most_wrong_df" ] }, { "cell_type": "code", "execution_count": 45, "metadata": { "collapsed": false, "scrolled": true }, "outputs": [ { "data": { "text/plain": [ "{1, 2, 3}" ] }, "execution_count": 45, "metadata": {}, "output_type": "execute_result" } ], "source": [ "set(most_wrong_df.Pclass.values.tolist())\n" ] }, { "cell_type": "code", "execution_count": 47, "metadata": { "collapsed": false }, "outputs": [ { "data": { "text/plain": [ "0.35241301907968575" ] }, "execution_count": 47, "metadata": {}, "output_type": "execute_result" } ], "source": [ "sum(data_df.Sex.values) / len(data_df.Sex.values)" ] }, { "cell_type": "code", "execution_count": 54, "metadata": { "collapsed": false }, "outputs": [ { "data": { "text/plain": [ "0.37593984962406013" ] }, "execution_count": 54, "metadata": {}, "output_type": "execute_result" } ], "source": [ "opppp = [el[0] for el in sorted(keep_issues, key=lambda x : abs(x[1]))]\n", "wrrrr = data_df[data_df.PassengerId.isin(np.array(test_idx)[opppp])][columns]\n", "\n", "sum(wrrrr.Sex) / len(wrrrr)" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "collapsed": true }, "outputs": [], "source": [ "get_top_train_influence" ] } ], "metadata": { "kernelspec": { "display_name": "Python 3", "language": "python", "name": "python3" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.6.3" } }, "nbformat": 4, "nbformat_minor": 2 }