Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
89 changes: 42 additions & 47 deletions lime.ipynb → brick.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@
"metadata": {
"colab": {
"provenance": [],
"authorship_tag": "ABX9TyMnPEht5MQJF/hLpcLpa95R",
"authorship_tag": "ABX9TyP7xVaASTvQGBUdHC6gHR/+",
"include_colab_link": true
},
"kernelspec": {
Expand Down Expand Up @@ -55,75 +55,70 @@
{
"cell_type": "code",
"source": [
"# Necessary imports\n",
"# Import necessary libraries for BRICK or collaborative filtering\n",
"import pandas as pd\n",
"from lime.lime_tabular import LimeTabularExplainer\n",
"from sklearn.ensemble import RandomForestRegressor\n",
"from sklearn.metrics import mean_squared_error, mean_absolute_error, r2_score\n",
"from surprise import Dataset, Reader, SVD\n",
"from surprise.model_selection import train_test_split\n",
"import numpy as np\n",
"\n",
"# Train a Random Forest Regressor\n",
"model = RandomForestRegressor(n_estimators=100, random_state=42)\n",
"model.fit(X_train, y_train)\n",
"# Load your data into a DataFrame (df should have 'user_id', 'business_id', and 'rating' columns)\n",
"# df = pd.read_csv('your_data.csv') # Example of loading your data\n",
"\n",
"# Evaluate the model\n",
"y_pred = model.predict(X_test)\n",
"# Assuming the DataFrame df contains 'user_id', 'business_id', and 'rating'\n",
"reader = Reader(rating_scale=(1, 5)) # Assuming ratings are on a scale of 1 to 5\n",
"data = Dataset.load_from_df(df[['user_id', 'business_id', 'rating']], reader)\n",
"\n",
"# Regression Metrics\n",
"mse = mean_squared_error(y_test, y_pred)\n",
"rmse = mean_squared_error(y_test, y_pred, squared=False) # Root Mean Squared Error\n",
"mae = mean_absolute_error(y_test, y_pred)\n",
"r2 = r2_score(y_test, y_pred)\n",
"# Split the data into training and test sets\n",
"trainset, testset = train_test_split(data, test_size=0.2)\n",
"\n",
"# Train the BRICK recommendation model (using SVD here as an example)\n",
"model = SVD()\n",
"model.fit(trainset)\n",
"\n",
"# Predict ratings for the test set\n",
"y_pred = model.test(testset)\n",
"\n",
"# Extract predictions and true ratings\n",
"y_true = [true_r for (_, _, true_r) in testset]\n",
"y_pred_ratings = [pred.est for (uid, iid, pred) in y_pred]\n",
"\n",
"# Calculate evaluation metrics\n",
"mse = mean_squared_error(y_true, y_pred_ratings)\n",
"rmse = mean_squared_error(y_true, y_pred_ratings, squared=False)\n",
"mae = mean_absolute_error(y_true, y_pred_ratings)\n",
"r2 = r2_score(y_true, y_pred_ratings)\n",
"\n",
"print(f\"Mean Squared Error (MSE): {mse}\")\n",
"print(f\"Root Mean Squared Error (RMSE): {rmse}\")\n",
"print(f\"Mean Absolute Error (MAE): {mae}\")\n",
"print(f\"R-squared (R2): {r2}\")\n",
"\n",
"# LIME interpretation setup\n",
"explainer = LimeTabularExplainer(\n",
" training_data=X_train.values,\n",
" feature_names=feature_columns,\n",
" mode='regression'\n",
")\n",
"\n",
"# Function to explain a single prediction\n",
"def explain_prediction(instance):\n",
" exp = explainer.explain_instance(\n",
" data_row=instance,\n",
" predict_fn=model.predict\n",
" )\n",
" return exp\n",
"\n",
"# Example usage with a specific instance from X_test\n",
"instance_index = 0 # Change this index for different instances\n",
"explanation = explain_prediction(X_test.values[instance_index])\n",
"\n",
"# Show the explanation\n",
"print(\"LIME Explanation for instance:\")\n",
"explanation.show_in_notebook(show_table=True) # If you're using a Jupyter Notebook\n",
"# Or you can print the explanation details\n",
"print(explanation.as_list())\n",
"\n",
"def predict_success_for_business_ids(business_ids, model, features_df, feature_columns):\n",
" # Filter the dataset for the provided business IDs\n",
" # Predict ratings for the given business IDs\n",
" input_data = features_df[features_df[\"business_id\"].isin(business_ids)]\n",
" X_input = input_data[feature_columns]\n",
" success_probabilities = []\n",
"\n",
" # Predict success probability\n",
" success_probabilities = model.predict(X_input)\n",
" for business_id in business_ids:\n",
" # Use the recommendation model to predict a rating for the business_id\n",
" pred = model.predict(user_id, business_id) # Specify a user_id as needed\n",
" success_probabilities.append(pred.est)\n",
"\n",
" # Create a DataFrame to display results\n",
" result_df = pd.DataFrame({\n",
" \"business_id\": input_data[\"business_id\"],\n",
" \"success_probability\": success_probabilities * 10000\n",
" \"business_id\": business_ids,\n",
" \"success_probability\": success_probabilities\n",
" })\n",
"\n",
" return result_df\n",
"\n",
"# Example usage\n",
"business_ids_list = df['business_id'].tolist() # Create a list of all business ids\n",
"business_ids_to_predict = business_ids_list # Use this for prediction\n",
"success_probabilities = predict_success_for_business_ids(business_ids_to_predict, model, df, feature_columns)\n",
"\n",
"# Specify a user_id for the predictions\n",
"user_id = 'your_user_id' # Replace with actual user ID\n",
"success_probabilities = predict_success_for_business_ids(business_ids_to_predict, model, df, ['business_id'])\n",
"\n",
"print(\"Predicted Success Probabilities:\")\n",
"print(success_probabilities)\n",
Expand All @@ -146,4 +141,4 @@
"outputs": []
}
]
}
}
144 changes: 144 additions & 0 deletions bricks.ipynb
Original file line number Diff line number Diff line change
@@ -0,0 +1,144 @@
{
"nbformat": 4,
"nbformat_minor": 0,
"metadata": {
"colab": {
"provenance": [],
"authorship_tag": "ABX9TyP7xVaASTvQGBUdHC6gHR/+",
"include_colab_link": true
},
"kernelspec": {
"name": "python3",
"display_name": "Python 3"
},
"language_info": {
"name": "python"
}
},
"cells": [
{
"cell_type": "markdown",
"metadata": {
"id": "view-in-github",
"colab_type": "text"
},
"source": [
"<a href=\"https://colab.research.google.com/github/karthikab5/BizRecProject/blob/dev/bricks.ipynb\" target=\"_parent\"><img src=\"https://colab.research.google.com/assets/colab-badge.svg\" alt=\"Open In Colab\"/></a>"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"colab": {
"base_uri": "https://localhost:8080/"
},
"id": "_u28rTWlh3fy",
"outputId": "001c88db-9ab4-4cee-ee26-ce62b3b44b96"
},
"outputs": [
{
"output_type": "stream",
"name": "stdout",
"text": [
"Mounted at /content/drive\n"
]
}
],
"source": [
"# prompt: code to drive mount\n",
"\n",
"from google.colab import drive\n",
"drive.mount('/content/drive')"
]
},
{
"cell_type": "code",
"source": [
"# Import necessary libraries for BRICK or collaborative filtering\n",
"import pandas as pd\n",
"from sklearn.metrics import mean_squared_error, mean_absolute_error, r2_score\n",
"from surprise import Dataset, Reader, SVD\n",
"from surprise.model_selection import train_test_split\n",
"import numpy as np\n",
"\n",
"# Load your data into a DataFrame (df should have 'user_id', 'business_id', and 'rating' columns)\n",
"# df = pd.read_csv('your_data.csv') # Example of loading your data\n",
"\n",
"# Assuming the DataFrame df contains 'user_id', 'business_id', and 'rating'\n",
"reader = Reader(rating_scale=(1, 5)) # Assuming ratings are on a scale of 1 to 5\n",
"data = Dataset.load_from_df(df[['user_id', 'business_id', 'rating']], reader)\n",
"\n",
"# Split the data into training and test sets\n",
"trainset, testset = train_test_split(data, test_size=0.2)\n",
"\n",
"# Train the BRICK recommendation model (using SVD here as an example)\n",
"model = SVD()\n",
"model.fit(trainset)\n",
"\n",
"# Predict ratings for the test set\n",
"y_pred = model.test(testset)\n",
"\n",
"# Extract predictions and true ratings\n",
"y_true = [true_r for (_, _, true_r) in testset]\n",
"y_pred_ratings = [pred.est for (uid, iid, pred) in y_pred]\n",
"\n",
"# Calculate evaluation metrics\n",
"mse = mean_squared_error(y_true, y_pred_ratings)\n",
"rmse = mean_squared_error(y_true, y_pred_ratings, squared=False)\n",
"mae = mean_absolute_error(y_true, y_pred_ratings)\n",
"r2 = r2_score(y_true, y_pred_ratings)\n",
"\n",
"print(f\"Mean Squared Error (MSE): {mse}\")\n",
"print(f\"Root Mean Squared Error (RMSE): {rmse}\")\n",
"print(f\"Mean Absolute Error (MAE): {mae}\")\n",
"print(f\"R-squared (R2): {r2}\")\n",
"\n",
"def predict_success_for_business_ids(business_ids, model, features_df, feature_columns):\n",
" # Predict ratings for the given business IDs\n",
" input_data = features_df[features_df[\"business_id\"].isin(business_ids)]\n",
" success_probabilities = []\n",
"\n",
" for business_id in business_ids:\n",
" # Use the recommendation model to predict a rating for the business_id\n",
" pred = model.predict(user_id, business_id) # Specify a user_id as needed\n",
" success_probabilities.append(pred.est)\n",
"\n",
" # Create a DataFrame to display results\n",
" result_df = pd.DataFrame({\n",
" \"business_id\": business_ids,\n",
" \"success_probability\": success_probabilities\n",
" })\n",
"\n",
" return result_df\n",
"\n",
"# Example usage\n",
"business_ids_list = df['business_id'].tolist() # Create a list of all business ids\n",
"business_ids_to_predict = business_ids_list # Use this for prediction\n",
"\n",
"# Specify a user_id for the predictions\n",
"user_id = 'your_user_id' # Replace with actual user ID\n",
"success_probabilities = predict_success_for_business_ids(business_ids_to_predict, model, df, ['business_id'])\n",
"\n",
"print(\"Predicted Success Probabilities:\")\n",
"print(success_probabilities)\n",
"\n",
"# Substitute a variable instead of directly giving business ID in SQL\n",
"business_df.createOrReplaceTempView(\"business_temp\")\n",
"review_df.createOrReplaceTempView(\"review_temp\")\n",
"checkin_df.createOrReplaceTempView(\"checkin_temp\")\n",
"business_id_to_search = 'SuSEmi52lP8gquHV0XIB9g' # Replace with your variable\n",
"\n",
"# Using SQL queries\n",
"spark.sql(f\"SELECT * FROM business_temp WHERE business_id = '{business_id_to_search}'\").show()\n",
"spark.sql(f\"SELECT * FROM review_temp WHERE business_id = '{business_id_to_search}'\").show()\n",
"spark.sql(f\"SELECT * FROM checkin_temp WHERE business_id = '{business_id_to_search}'\").show()\n"
],
"metadata": {
"id": "1aNFkM6Th-T-"
},
"execution_count": null,
"outputs": []
}
]
}