CIRAIG
diff --git a/‎Data/IOIC_sectors.json‎
Lines changed: 1 addition & 0 deletions b/‎Data/IOIC_sectors.json‎
Lines changed: 1 addition & 0 deletions
diff --git a/‎Data/IW_2.0_flows.json‎
Lines changed: 1 addition & 0 deletions b/‎Data/IW_2.0_flows.json‎
Lines changed: 1 addition & 0 deletions
diff --git a/‎Data/exiobase_sectors.json‎
Lines changed: 1 addition & 0 deletions b/‎Data/exiobase_sectors.json‎
Lines changed: 1 addition & 0 deletions
diff --git a/‎README.md‎
Lines changed: 17 additions & 0 deletions b/‎README.md‎
Lines changed: 17 additions & 0 deletions
diff --git a/‎demo.ipynb‎
Lines changed: 259 additions & 0 deletions b/‎demo.ipynb‎
Lines changed: 259 additions & 0 deletions
@@ -0,0 +1,17 @@
+## Installation
+Install necessary module with pip
+~~~
+pip install -U sentence-transformers
+~~~
+
+Install necessary module with conda
+~~~
+conda install -c conda-forge sentence-transformers
+~~~
+
+## Getting started
+Check the demo.ipynb notebook for a general demo
+Each other notebook is tailored for mapping with specific databases (openIO-Canada, IMPACT World+ or exiobase)
+
+## Credit
+This module is reusing the great work of https://github.com/UKPLab/sentence-transformers
@@ -0,0 +1,259 @@
+{
+ "cells": [
+  {
+   "cell_type": "code",
+   "execution_count": 10,
+   "id": "32c6bfd6",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "import json\n",
+    "import pandas as pd\n",
+    "\n",
+    "# sentence_transformers from here: https://github.com/UKPLab/sentence-transformers\n",
+    "# just pip install it\n",
+    "from sentence_transformers import (SentenceTransformer, util)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 2,
+   "id": "c85d2644",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# import and select the machine learning model\n",
+    "# types of models available here: https://www.sbert.net/docs/pretrained_models.html\n",
+    "model = SentenceTransformer('all-MiniLM-L6-v2')"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 7,
+   "id": "dba50e72",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# load the list of sectors/names to be used as reference\n",
+    "with open('C://Users/11max/PycharmProjects/Mapping_ML/Data/IOIC_sectors.json','r') as f:\n",
+    "    IOIC_sectors = json.load(f)\n",
+    "# load the reference list into the machine learning model\n",
+    "IOIC_embeddings = model.encode(IOIC_sectors)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 4,
+   "id": "efa5a558",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# enter a list of the names/products to be matched to the reference\n",
+    "products = ['ADPE System Configuration','Geophysical Instruments']\n",
+    "# load those names/products in the machine learning model\n",
+    "products_embeddings = model.encode(products)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 8,
+   "id": "60fc32c4",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# calculate the similarity between each names/products to-be-matched and the reference list\n",
+    "scores = util.pytorch_cos_sim(products_embeddings, IOIC_embeddings)\n",
+    "# sort and extract indices of each scores\n",
+    "sorted_scores, indices = scores.sort(dim=1, descending=True)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 15,
+   "id": "e3f4ea35",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# store data in a nice dataframe\n",
+    "df_results = pd.DataFrame(None, ['order', 'sector', 'similarity'])\n",
+    "\n",
+    "# the number of similarities per name/product to-be-matched that will be provided\n",
+    "# you can see this as the number of attempts the algorithm is trying to matched products to reference\n",
+    "number_of_matches = 5\n",
+    "\n",
+    "for i, product in enumerate(products):\n",
+    "    for j in range(0, number_of_matches):\n",
+    "        df_results = pd.concat([df_results, \n",
+    "                   pd.DataFrame([product, \n",
+    "                                 j+1,\n",
+    "                                 IOIC_sectors[indices[i][j].cpu().numpy()], \n",
+    "                                 sorted_scores[i][j].cpu().numpy().tolist()],\n",
+    "                                ['product', 'order', 'sector', 'similarity'])],\n",
+    "                               axis=1)\n",
+    "        \n",
+    "df_results = df_results.T.set_index(['product', 'order'])"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 16,
+   "id": "a596ead3",
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/html": [
+       "<div>\n",
+       "<style scoped>\n",
+       "    .dataframe tbody tr th:only-of-type {\n",
+       "        vertical-align: middle;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe tbody tr th {\n",
+       "        vertical-align: top;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe thead th {\n",
+       "        text-align: right;\n",
+       "    }\n",
+       "</style>\n",
+       "<table border=\"1\" class=\"dataframe\">\n",
+       "  <thead>\n",
+       "    <tr style=\"text-align: right;\">\n",
+       "      <th></th>\n",
+       "      <th></th>\n",
+       "      <th>sector</th>\n",
+       "      <th>similarity</th>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>product</th>\n",
+       "      <th>order</th>\n",
+       "      <th></th>\n",
+       "      <th></th>\n",
+       "    </tr>\n",
+       "  </thead>\n",
+       "  <tbody>\n",
+       "    <tr>\n",
+       "      <th rowspan=\"5\" valign=\"top\">ADPE System Configuration</th>\n",
+       "      <th>1</th>\n",
+       "      <td>Office administrative services</td>\n",
+       "      <td>0.254899</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>2</th>\n",
+       "      <td>Computer systems design and related services (...</td>\n",
+       "      <td>0.237132</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>3</th>\n",
+       "      <td>Custom software design and development services</td>\n",
+       "      <td>0.226263</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>4</th>\n",
+       "      <td>Advertising, public relations, and related ser...</td>\n",
+       "      <td>0.223295</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>5</th>\n",
+       "      <td>Facilities and other support services</td>\n",
+       "      <td>0.201962</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th rowspan=\"5\" valign=\"top\">Geophysical Instruments</th>\n",
+       "      <th>1</th>\n",
+       "      <td>Measuring, control and scientific instruments</td>\n",
+       "      <td>0.544412</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>2</th>\n",
+       "      <td>Navigational and guidance instruments</td>\n",
+       "      <td>0.40421</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>3</th>\n",
+       "      <td>Other civil engineering works</td>\n",
+       "      <td>0.386468</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>4</th>\n",
+       "      <td>Other professional, scientific and technical s...</td>\n",
+       "      <td>0.353572</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>5</th>\n",
+       "      <td>Other electrical equipment and components</td>\n",
+       "      <td>0.315113</td>\n",
+       "    </tr>\n",
+       "  </tbody>\n",
+       "</table>\n",
+       "</div>"
+      ],
+      "text/plain": [
+       "                                                                            sector  \\\n",
+       "product                   order                                                      \n",
+       "ADPE System Configuration 1                         Office administrative services   \n",
+       "                          2      Computer systems design and related services (...   \n",
+       "                          3        Custom software design and development services   \n",
+       "                          4      Advertising, public relations, and related ser...   \n",
+       "                          5                  Facilities and other support services   \n",
+       "Geophysical Instruments   1          Measuring, control and scientific instruments   \n",
+       "                          2                  Navigational and guidance instruments   \n",
+       "                          3                          Other civil engineering works   \n",
+       "                          4      Other professional, scientific and technical s...   \n",
+       "                          5              Other electrical equipment and components   \n",
+       "\n",
+       "                                similarity  \n",
+       "product                   order             \n",
+       "ADPE System Configuration 1       0.254899  \n",
+       "                          2       0.237132  \n",
+       "                          3       0.226263  \n",
+       "                          4       0.223295  \n",
+       "                          5       0.201962  \n",
+       "Geophysical Instruments   1       0.544412  \n",
+       "                          2        0.40421  \n",
+       "                          3       0.386468  \n",
+       "                          4       0.353572  \n",
+       "                          5       0.315113  "
+      ]
+     },
+     "execution_count": 16,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "df_results"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "485c61a8",
+   "metadata": {},
+   "outputs": [],
+   "source": []
+  }
+ ],
+ "metadata": {
+  "kernelspec": {
+   "display_name": "Python 3 (ipykernel)",
+   "language": "python",
+   "name": "python3"
+  },
+  "language_info": {
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 3
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython3",
+   "version": "3.9.7"
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 5
+}