From 450f980edddea921477c84cf58f55dd463253ee1 Mon Sep 17 00:00:00 2001 From: Alexander Myskov Date: Wed, 24 Nov 2021 11:54:18 +0300 Subject: [PATCH 01/10] correct Census sample Signed-off-by: Alexander Myskov --- .../Census/census_modin.ipynb | 17 +++++++++-------- 1 file changed, 9 insertions(+), 8 deletions(-) diff --git a/AI-and-Analytics/End-to-end-Workloads/Census/census_modin.ipynb b/AI-and-Analytics/End-to-end-Workloads/Census/census_modin.ipynb index 95d79349fe..de4b3bab3d 100644 --- a/AI-and-Analytics/End-to-end-Workloads/Census/census_modin.ipynb +++ b/AI-and-Analytics/End-to-end-Workloads/Census/census_modin.ipynb @@ -90,10 +90,11 @@ }, "outputs": [], "source": [ - "#import modin.pandas as pd\n", - "os.environ[\"MODIN_ENGINE\"] = \"native\"\n", - "os.environ[\"MODIN_BACKEND\"] = \"omnisci\"\n", - "os.environ[\"MODIN_EXPERIMENTAL\"] = \"True\"\n", + "#import pandas as pd\n", + "import modin.config as cfg\n", + "cfg.Engine.put('native')\n", + "cfg.Backend.put('omnisci')\n", + "cfg.IsExperimental.put('True')\n", "import modin.pandas as pd" ] }, @@ -148,7 +149,7 @@ }, "outputs": [], "source": [ - "df = pd.read_csv('ipums_education2income_1970-2010.csv.gz', compression=\"gzip\", nrows=10000)" + "df = pd.read_csv('ipums_education2income_1970-2010.csv.gz')" ] }, { @@ -183,9 +184,9 @@ "df = df[keep_cols]\n", "\n", "# clean up samples with invalid income, education, etc.\n", - "df = df.query(\"INCTOT != 9999999\")\n", - "df = df.query(\"EDUC != -1\")\n", - "df = df.query(\"EDUCD != -1\")\n", + "df = df[df[\"INCTOT\"] != 9999999]\n", + "df = df[df[\"EDUC\"] != -1]\n", + "df = df[df[\"EDUCD\"] != -1]\n", "\n", "# normalize income for inflation\n", "df[\"INCTOT\"] = df[\"INCTOT\"] * df[\"CPI99\"]\n", From 415d66b29123266e78e3adc684ae6ef96f2bcd34 Mon Sep 17 00:00:00 2001 From: Alexander Myskov Date: Wed, 24 Nov 2021 11:56:48 +0300 Subject: [PATCH 02/10] correct getting started sample Signed-off-by: Alexander Myskov --- .../IntelModin_GettingStarted/IntelModin_GettingStarted.ipynb | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/AI-and-Analytics/Getting-Started-Samples/IntelModin_GettingStarted/IntelModin_GettingStarted.ipynb b/AI-and-Analytics/Getting-Started-Samples/IntelModin_GettingStarted/IntelModin_GettingStarted.ipynb index 777df8b8fa..42579f1f0f 100644 --- a/AI-and-Analytics/Getting-Started-Samples/IntelModin_GettingStarted/IntelModin_GettingStarted.ipynb +++ b/AI-and-Analytics/Getting-Started-Samples/IntelModin_GettingStarted/IntelModin_GettingStarted.ipynb @@ -58,7 +58,7 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "We will also be importing **stock Pandas as pd** and **Modin as mpd to show differentiation**. You can see importing Modin is simple and **does not require any additional steps.**" + "We will also be importing **stock Pandas** and **Modin as pd to show differentiation**. You can see importing Modin is simple and **does not require any additional steps.**" ] }, { From 0507ba06d4ff8b9fa7fa0391ac51dd6830ae9859 Mon Sep 17 00:00:00 2001 From: Alexander Myskov Date: Wed, 1 Dec 2021 15:31:05 +0300 Subject: [PATCH 03/10] correct conda install command Signed-off-by: Alexander Myskov --- .../IntelModin_GettingStarted/README.md | 11 +++++------ 1 file changed, 5 insertions(+), 6 deletions(-) diff --git a/AI-and-Analytics/Getting-Started-Samples/IntelModin_GettingStarted/README.md b/AI-and-Analytics/Getting-Started-Samples/IntelModin_GettingStarted/README.md index dbc9e7f21a..0b523d894d 100644 --- a/AI-and-Analytics/Getting-Started-Samples/IntelModin_GettingStarted/README.md +++ b/AI-and-Analytics/Getting-Started-Samples/IntelModin_GettingStarted/README.md @@ -52,17 +52,16 @@ source activate intel-aikit-modin ### Activate conda environment Without Root Access (Optional) -By default, the Intel® oneAPI AI Analytics toolkit is installed in the `oneapi` folder, which requires root privileges to manage it. If you would like to bypass using root access to manage your conda environment, then you can clone your desired conda environment using the following command: +By default, the Intel® oneAPI AI Analytics toolkit is installed in the `oneapi` folder, which requires root privileges to manage it. If you would like to bypass using root access to manage your conda environment, then you can install the Intel® Distribution of Modin* python environment with the following command: #### Linux + ``` -conda create --name user-intel-aikit-modin --clone intel-aikit-modin +conda create -y -n modin-conda-forge -c conda-forge --strict-channel-priority modin-all matplotlib ``` - Then activate your conda environment with the following command: - ``` -source activate user-intel-aikit-modin +conda activate modin-conda-forge ``` @@ -71,7 +70,7 @@ source activate user-intel-aikit-modin Launch Jupyter Notebook in the directory housing the code example: ``` -conda install jupyter nb_conda_kernels +conda install jupyter nb_conda_kernels -c conda-forge -y ``` #### View in Jupyter Notebook From 08b8d2debffb8bf7a46a000cba0dc265f77b09ad Mon Sep 17 00:00:00 2001 From: Alexander Myskov Date: Wed, 1 Dec 2021 16:01:56 +0300 Subject: [PATCH 04/10] correct sample.json, modify conda install cmd Signed-off-by: Alexander Myskov --- .../IntelModin_GettingStarted/README.md | 2 +- .../IntelModin_GettingStarted/sample.json | 4 +++- 2 files changed, 4 insertions(+), 2 deletions(-) diff --git a/AI-and-Analytics/Getting-Started-Samples/IntelModin_GettingStarted/README.md b/AI-and-Analytics/Getting-Started-Samples/IntelModin_GettingStarted/README.md index 0b523d894d..a140536901 100644 --- a/AI-and-Analytics/Getting-Started-Samples/IntelModin_GettingStarted/README.md +++ b/AI-and-Analytics/Getting-Started-Samples/IntelModin_GettingStarted/README.md @@ -57,7 +57,7 @@ By default, the Intel® oneAPI AI Analytics toolkit is installed in the `oneapi` #### Linux ``` -conda create -y -n modin-conda-forge -c conda-forge --strict-channel-priority modin-all matplotlib +conda create -y -n modin-conda-forge -c conda-forge modin-all matplotlib ``` Then activate your conda environment with the following command: ``` diff --git a/AI-and-Analytics/Getting-Started-Samples/IntelModin_GettingStarted/sample.json b/AI-and-Analytics/Getting-Started-Samples/IntelModin_GettingStarted/sample.json index bc5ad6bdca..e960c533a0 100755 --- a/AI-and-Analytics/Getting-Started-Samples/IntelModin_GettingStarted/sample.json +++ b/AI-and-Analytics/Getting-Started-Samples/IntelModin_GettingStarted/sample.json @@ -13,7 +13,9 @@ "env": ["source /opt/intel/oneapi/setvars.sh --force", "conda create -n aikit-modin-test -c intel -c conda-forge matplotlib runipy intel-aikit-modin", "source activate aikit-modin-test"], "id": "Intel_Modin_GS_py", "steps": [ - "runipy IntelModin_GettingStarted.ipynb" + "conda create -y -n modin-conda-forge -c conda-forge --strict-channel-priority modin-all matplotlib runipy", + "conda activate modin-conda-forge", + "runipy IntelModin_GettingStarted.ipynb" ] } ] From 73d608b2037c6e29619933a368dcc7f9cab1d109 Mon Sep 17 00:00:00 2001 From: Alexander Myskov Date: Wed, 1 Dec 2021 16:22:27 +0300 Subject: [PATCH 05/10] fix typo Signed-off-by: Alexander Myskov --- .../Getting-Started-Samples/IntelModin_GettingStarted/README.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/AI-and-Analytics/Getting-Started-Samples/IntelModin_GettingStarted/README.md b/AI-and-Analytics/Getting-Started-Samples/IntelModin_GettingStarted/README.md index a140536901..9c82d12495 100644 --- a/AI-and-Analytics/Getting-Started-Samples/IntelModin_GettingStarted/README.md +++ b/AI-and-Analytics/Getting-Started-Samples/IntelModin_GettingStarted/README.md @@ -15,7 +15,7 @@ Intel Distribution of Modin* uses Ray or Dask to provide an effortless way to sp In this sample, you will run Intel Distribution of Modin*-accelerated Pandas functions and note the performance gain when compared to "stock" (aka standard) Pandas functions. ## Key Implementation Details -This Getting Started sample code is implemented for CPU using the Python language. The example assumes you have Pandas and MoIntel Distribution of Modin* installed inside a conda environment, similar to what is directed by the [Intel® oneAPI AI Analytics Toolkit](https://www.intel.com/content/www/us/en/develop/documentation/installation-guide-for-intel-oneapi-toolkits-linux/top/installation/install-using-package-managers/conda/install-intel-ai-analytics-toolkit-via-conda.html). +This Getting Started sample code is implemented for CPU using the Python language. The example assumes you have Pandas and Intel Distribution of Modin* installed inside a conda environment, similar to what is directed by the [Intel® oneAPI AI Analytics Toolkit](https://www.intel.com/content/www/us/en/develop/documentation/installation-guide-for-intel-oneapi-toolkits-linux/top/installation/install-using-package-managers/conda/install-intel-ai-analytics-toolkit-via-conda.html). ## License From 30537f67e9da3e9fd269ce5ee2a05b9628419583 Mon Sep 17 00:00:00 2001 From: Alexander Myskov Date: Mon, 6 Dec 2021 13:46:01 +0300 Subject: [PATCH 06/10] disable warnings Signed-off-by: Alexander Myskov --- .../End-to-end-Workloads/Census/census_modin.ipynb | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/AI-and-Analytics/End-to-end-Workloads/Census/census_modin.ipynb b/AI-and-Analytics/End-to-end-Workloads/Census/census_modin.ipynb index de4b3bab3d..c01c46d802 100644 --- a/AI-and-Analytics/End-to-end-Workloads/Census/census_modin.ipynb +++ b/AI-and-Analytics/End-to-end-Workloads/Census/census_modin.ipynb @@ -56,7 +56,7 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "Import basic python modules" + "Import basic python modules and disable warnings to avoid output cluttering" ] }, { @@ -66,7 +66,10 @@ "outputs": [], "source": [ "import os\n", - "import numpy as np" + "import numpy as np\n", + "import warnings\n", + "\n", + "warnings.filterwarnings(\"ignore\")" ] }, { From 68ff984b7230cf43dd8cece1e05cb5a72ea98584 Mon Sep 17 00:00:00 2001 From: Alexander Myskov Date: Wed, 19 Jan 2022 15:23:49 +0300 Subject: [PATCH 07/10] specify configuration setting for newer modin versions Signed-off-by: Alexander Myskov --- .../Census/census_modin.ipynb | 16 ++++++++++++---- 1 file changed, 12 insertions(+), 4 deletions(-) diff --git a/AI-and-Analytics/End-to-end-Workloads/Census/census_modin.ipynb b/AI-and-Analytics/End-to-end-Workloads/Census/census_modin.ipynb index c01c46d802..f65cf76ce8 100644 --- a/AI-and-Analytics/End-to-end-Workloads/Census/census_modin.ipynb +++ b/AI-and-Analytics/End-to-end-Workloads/Census/census_modin.ipynb @@ -94,11 +94,19 @@ "outputs": [], "source": [ "#import pandas as pd\n", + "import modin.pandas as pd\n", + "\n", "import modin.config as cfg\n", - "cfg.Engine.put('native')\n", - "cfg.Backend.put('omnisci')\n", - "cfg.IsExperimental.put('True')\n", - "import modin.pandas as pd" + "from packaging import version\n", + "import modin\n", + "\n", + "# Since modin 0.12.0 OmniSci engine can be enabled by setting the single StorageFormat configuration\n", + "if version.parse(modin.__version__) <= version.parse('0.11.3'):\n", + " cfg.Engine.put('native')\n", + " cfg.Backend.put('omnisci')\n", + " cfg.IsExperimental.put('True')\n", + "else:\n", + " cfg.StorageFormat.put('omnisci')\n" ] }, { From 5e803168bd880a5ddd6bfdbb57d5f54cd1239eec Mon Sep 17 00:00:00 2001 From: Alexander Myskov Date: Fri, 21 Jan 2022 15:58:45 +0300 Subject: [PATCH 08/10] correct configuration setting Signed-off-by: Alexander Myskov --- .../End-to-end-Workloads/Census/census_modin.ipynb | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/AI-and-Analytics/End-to-end-Workloads/Census/census_modin.ipynb b/AI-and-Analytics/End-to-end-Workloads/Census/census_modin.ipynb index f65cf76ce8..746d53a8d4 100644 --- a/AI-and-Analytics/End-to-end-Workloads/Census/census_modin.ipynb +++ b/AI-and-Analytics/End-to-end-Workloads/Census/census_modin.ipynb @@ -100,11 +100,11 @@ "from packaging import version\n", "import modin\n", "\n", - "# Since modin 0.12.0 OmniSci engine can be enabled by setting the single StorageFormat configuration\n", + "cfg.IsExperimental.put(\"True\")\n", + "cfg.Engine.put('native')\n", + "# Since modin 0.12.0 OmniSci engine activation process slightly changed\n", "if version.parse(modin.__version__) <= version.parse('0.11.3'):\n", - " cfg.Engine.put('native')\n", " cfg.Backend.put('omnisci')\n", - " cfg.IsExperimental.put('True')\n", "else:\n", " cfg.StorageFormat.put('omnisci')\n" ] From f3bb7ae85284852ec1c6a3de0032f73564db75ed Mon Sep 17 00:00:00 2001 From: Alexander Myskov Date: Fri, 21 Jan 2022 17:57:49 +0300 Subject: [PATCH 09/10] correct instalation instructions Signed-off-by: Alexander Myskov --- .../IntelModin_GettingStarted/README.md | 3 ++- .../IntelModin_GettingStarted/sample.json | 3 ++- 2 files changed, 4 insertions(+), 2 deletions(-) diff --git a/AI-and-Analytics/Getting-Started-Samples/IntelModin_GettingStarted/README.md b/AI-and-Analytics/Getting-Started-Samples/IntelModin_GettingStarted/README.md index 9c82d12495..0915e2de6a 100644 --- a/AI-and-Analytics/Getting-Started-Samples/IntelModin_GettingStarted/README.md +++ b/AI-and-Analytics/Getting-Started-Samples/IntelModin_GettingStarted/README.md @@ -57,7 +57,8 @@ By default, the Intel® oneAPI AI Analytics toolkit is installed in the `oneapi` #### Linux ``` -conda create -y -n modin-conda-forge -c conda-forge modin-all matplotlib +conda create -y -n modin-conda-forge -c conda-forge modin-all +conda install -y -n modin-conda-forge -c conda-forge matplotlib ``` Then activate your conda environment with the following command: ``` diff --git a/AI-and-Analytics/Getting-Started-Samples/IntelModin_GettingStarted/sample.json b/AI-and-Analytics/Getting-Started-Samples/IntelModin_GettingStarted/sample.json index e960c533a0..a919ad25db 100755 --- a/AI-and-Analytics/Getting-Started-Samples/IntelModin_GettingStarted/sample.json +++ b/AI-and-Analytics/Getting-Started-Samples/IntelModin_GettingStarted/sample.json @@ -13,7 +13,8 @@ "env": ["source /opt/intel/oneapi/setvars.sh --force", "conda create -n aikit-modin-test -c intel -c conda-forge matplotlib runipy intel-aikit-modin", "source activate aikit-modin-test"], "id": "Intel_Modin_GS_py", "steps": [ - "conda create -y -n modin-conda-forge -c conda-forge --strict-channel-priority modin-all matplotlib runipy", + "conda create -y -n modin-conda-forge -c conda-forge modin-all", + "conda install -y -n modin-conda-forge -c conda-forge matplotlib runipy", "conda activate modin-conda-forge", "runipy IntelModin_GettingStarted.ipynb" ] From 6682d8d2435a1b95b5d97ac4f7003157c194959d Mon Sep 17 00:00:00 2001 From: Alexander Myskov Date: Fri, 18 Feb 2022 12:58:58 +0300 Subject: [PATCH 10/10] correct CI for modin sample Signed-off-by: Alexander Myskov --- .../End-to-end-Workloads/Census/sample.json | 23 +++++++++++-------- 1 file changed, 13 insertions(+), 10 deletions(-) diff --git a/AI-and-Analytics/End-to-end-Workloads/Census/sample.json b/AI-and-Analytics/End-to-end-Workloads/Census/sample.json index 49f3a324a7..995ed5217e 100755 --- a/AI-and-Analytics/End-to-end-Workloads/Census/sample.json +++ b/AI-and-Analytics/End-to-end-Workloads/Census/sample.json @@ -10,16 +10,19 @@ "targetDevice": ["CPU"], "ciTests": { "linux": [ - { - "env": ["source activate base"], - "steps": [ - "conda create -y -n intel-aikit-modin intel-aikit-modin -c intel", - "conda activate intel-aikit-modin", - "conda install -y runipy", - "pip install opencensus", - "runipy census_modin.ipynb" - ] - } + { + "env": [], + "id": "Intel_Modin_E2E_py", + "steps": [ + "set -e # Terminate the script on first error", + "source $(conda info --base)/etc/profile.d/conda.sh # Bypassing conda's disability to activate environments inside a bash script: https://github.com/conda/conda/issues/7980", + "conda create -y -n intel-aikit-modin intel-aikit-modin -c intel", + "conda activate intel-aikit-modin", + "conda install -y jupyter # Installing 'jupyter' for extended abilities to execute the notebook", + "pip install opencensus # Installing 'runipy' for extended abilities to execute the notebook", + "jupyter nbconvert --to notebook --execute census_modin.ipynb" + ] + } ] } }