diff --git a/main/COMO.ipynb b/main/COMO.ipynb
index d361f9b2..bbaeb152 100644
--- a/main/COMO.ipynb
+++ b/main/COMO.ipynb
@@ -2,7 +2,11 @@
  "cells": [
   {
    "cell_type": "markdown",
-   "metadata": {},
+   "metadata": {
+    "jupyter": {
+     "source_hidden": true
+    }
+   },
    "source": [
     "# COMO: Constraint-based Optomization of Metabolic Objectives\n",
     "\n",
@@ -56,7 +60,11 @@
   },
   {
    "cell_type": "markdown",
-   "metadata": {},
+   "metadata": {
+    "jupyter": {
+     "source_hidden": true
+    }
+   },
    "source": [
     "# Step 1: Data Preprocessing and Analysis\n",
     "\n",
@@ -223,6 +231,35 @@
     "</table>"
    ]
   },
+  {
+   "cell_type": "code",
+   "execution_count": 1,
+   "metadata": {
+    "ExecuteTime": {
+     "end_time": "2024-12-09T22:10:43.421233Z",
+     "start_time": "2024-12-09T22:10:43.418100Z"
+    }
+   },
+   "outputs": [],
+   "source": [
+    "from pathlib import Path\n",
+    "\n",
+    "from como.rnaseq_preprocess import rnaseq_preprocess\n",
+    "from como.types import RNAPrepMethod\n",
+    "\n",
+    "taxon_id = 9606\n",
+    "context_names = [\"naiveB\"]\n",
+    "gene_info_filepath = [Path(f\"data/results/{context}/gene_info.csv\") for context in context_names]\n",
+    "como_context_dir = [Path(f\"data/COMO_input/{context}\") for context in context_names]\n",
+    "trna_matrix_filepath = [Path(f\"data/results/{context}/total-rna/totalrna_{context}.csv\") for context in context_names]\n",
+    "polya_matrix_filepath = [Path(f\"data/results/{context}/polya-rna/polyarna_{context}.csv\") for context in context_names]\n",
+    "\n",
+    "# No single-cell data is provided by default; COMO accepts single-cell data in CSV or h5ad format\n",
+    "# If you are using single-cell data, adjust the following lines accordingly\n",
+    "scrna_matrix_filepath = [Path(f\"data/results/{context}/scrna/scrna_{context}.csv\") for context in context_names]\n",
+    "# scrna_matrix_filepath = [Path(f\"data/results/{context}/scrna/scrna_{context}.h5ad\") for context in context_names]\n"
+   ]
+  },
   {
    "cell_type": "markdown",
    "metadata": {},
@@ -236,7 +273,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 1,
+   "execution_count": 10,
    "metadata": {
     "ExecuteTime": {
      "end_time": "2024-12-07T03:30:27.253112Z",
@@ -248,50 +285,24 @@
      "name": "stderr",
      "output_type": "stream",
      "text": [
-      "\u001B[32m2024-12-06 23:12:10\u001B[0m | \u001B[1mINFO    \u001B[0m | \u001B[36mcomo.rnaseq_preprocess\u001B[0m:\u001B[36m629\u001B[0m - \u001B[1mTEST\u001B[0m\n"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "Starting...\n"
-     ]
-    },
-    {
-     "name": "stderr",
-     "output_type": "stream",
-     "text": [
-      "\u001B[32m2024-12-06 23:12:11\u001B[0m | \u001B[32m\u001B[1mSUCCESS \u001B[0m | \u001B[36mcomo.rnaseq_preprocess\u001B[0m:\u001B[36m274\u001B[0m - \u001B[32m\u001B[1mWrote gene count matrix for 'total' RNA at '/Users/joshl/Projects/COMO/main/data/results/naiveB/total-rna/totalrna_naiveB.csv'\u001B[0m\n",
-      "\u001B[32m2024-12-06 23:12:11\u001B[0m | \u001B[32m\u001B[1mSUCCESS \u001B[0m | \u001B[36mcomo.rnaseq_preprocess\u001B[0m:\u001B[36m274\u001B[0m - \u001B[32m\u001B[1mWrote gene count matrix for 'polya' RNA at '/Users/joshl/Projects/COMO/main/data/results/naiveB/polya-rna/polyarna_naiveB.csv'\u001B[0m\n",
-      "\u001B[32m2024-12-06 23:12:11\u001B[0m | \u001B[1mINFO    \u001B[0m | \u001B[36mcomo.rnaseq_preprocess\u001B[0m:\u001B[36m451\u001B[0m - \u001B[1mFetching gene info (this may take 1-5 minutes)\u001B[0m\n",
-      "\u001B[32m2024-12-06 23:13:04\u001B[0m | \u001B[32m\u001B[1mSUCCESS \u001B[0m | \u001B[36mcomo.rnaseq_preprocess\u001B[0m:\u001B[36m488\u001B[0m - \u001B[32m\u001B[1mGene Info file written at '/Users/joshl/Projects/COMO/main/data/results/naiveB/gene_info.csv'\u001B[0m\n"
+      "\u001B[32m2024-12-09 16:23:55\u001B[0m | \u001B[32m\u001B[1mSUCCESS \u001B[0m | \u001B[36mcomo.rnaseq_preprocess\u001B[0m:\u001B[36m274\u001B[0m - \u001B[32m\u001B[1mWrote gene count matrix for 'polya' RNA at '/Users/joshl/Projects/COMO/main/data/results/naiveB/polya-rna/polyarna_naiveB.csv'\u001B[0m\n",
+      "\u001B[32m2024-12-09 16:23:55\u001B[0m | \u001B[32m\u001B[1mSUCCESS \u001B[0m | \u001B[36mcomo.rnaseq_preprocess\u001B[0m:\u001B[36m274\u001B[0m - \u001B[32m\u001B[1mWrote gene count matrix for 'total' RNA at '/Users/joshl/Projects/COMO/main/data/results/naiveB/total-rna/totalrna_naiveB.csv'\u001B[0m\n",
+      "\u001B[32m2024-12-09 16:23:55\u001B[0m | \u001B[1mINFO    \u001B[0m | \u001B[36mcomo.rnaseq_preprocess\u001B[0m:\u001B[36m451\u001B[0m - \u001B[1mFetching gene info (this may take 1-5 minutes)\u001B[0m\n",
+      "\u001B[32m2024-12-09 16:24:13\u001B[0m | \u001B[32m\u001B[1mSUCCESS \u001B[0m | \u001B[36mcomo.rnaseq_preprocess\u001B[0m:\u001B[36m488\u001B[0m - \u001B[32m\u001B[1mGene Info file written at '/Users/joshl/Projects/COMO/main/data/results/naiveB/gene_info.csv'\u001B[0m\n"
      ]
     }
    ],
    "source": [
-    "from pathlib import Path\n",
-    "\n",
-    "from como.rnaseq_preprocess import rnaseq_preprocess\n",
-    "from como.types import RNAPrepMethod\n",
-    "\n",
-    "context_names = [\"naiveB\"]\n",
-    "output_gene_info_filepaths = [Path(f\"data/results/{context}/gene_info.csv\") for context in context_names]\n",
-    "como_context_dirs = [Path(f\"data/COMO_input/{context}\") for context in context_names]\n",
-    "output_trna_filepaths = [Path(f\"data/results/{context}/total-rna/totalrna_{context}.csv\") for context in context_names]\n",
-    "output_polya_filepaths = [Path(f\"data/results/{context}/polya-rna/polyarna_{context}.csv\") for context in context_names]\n",
-    "\n",
-    "\n",
     "for i in range(len(context_names)):\n",
     "    await rnaseq_preprocess(\n",
     "        context_name=context_names[i],\n",
-    "        taxon=9606,\n",
-    "        output_gene_info_filepath=output_gene_info_filepaths[i],\n",
-    "        como_context_dir=como_context_dirs[i],\n",
+    "        taxon=taxon_id,\n",
+    "        output_gene_info_filepath=gene_info_filepath[i],\n",
+    "        como_context_dir=como_context_dir[i],\n",
     "        output_trna_config_filepath=Path(\"./data/config_sheets/trna_config.xlsx\"),\n",
-    "        output_trna_count_matrix_filepath=output_trna_filepaths[i],\n",
-    "        output_polya_config_filepath=Path(\"./data/config_sheets/mrna_config.xlsx\"),\n",
-    "        output_polya_count_matrix_filepath=output_polya_filepaths[i],\n",
+    "        output_trna_count_matrix_filepath=trna_matrix_filepath[i],\n",
+    "        output_mrna_config_filepath=Path(\"./data/config_sheets/polya_config.xlsx\"),\n",
+    "        output_mrna_count_matrix_filepath=polya_matrix_filepath[i],\n",
     "        cache=True,\n",
     "        log_level=\"INFO\",\n",
     "    )"
@@ -369,39 +380,93 @@
   },
   {
    "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
+   "execution_count": 4,
+   "metadata": {
+    "ExecuteTime": {
+     "end_time": "2024-12-09T22:13:42.060657Z",
+     "start_time": "2024-12-09T22:13:41.740347Z"
+    }
+   },
+   "outputs": [
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "\u001B[32m2024-12-09 16:18:43.958\u001B[0m | \u001B[34m\u001B[1mDEBUG   \u001B[0m | \u001B[36mcomo.rnaseq_gen\u001B[0m:\u001B[36mrnaseq_gen\u001B[0m:\u001B[36m805\u001B[0m - \u001B[34m\u001B[1mStarting 'naiveB'\u001B[0m\n",
+      "\u001B[32m2024-12-09 16:18:43.961\u001B[0m | \u001B[34m\u001B[1mDEBUG   \u001B[0m | \u001B[36mcomo.rnaseq_gen\u001B[0m:\u001B[36m_read_counts\u001B[0m:\u001B[36m175\u001B[0m - \u001B[34m\u001B[1mReading CSV file at 'data/results/naiveB/total-rna/totalrna_naiveB.csv'\u001B[0m\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "       entrez_gene_id  expressed  high\n",
+      "0                7105          0     0\n",
+      "1               64102          0     0\n",
+      "2                8813          0     0\n",
+      "3               57147          0     0\n",
+      "4               55732          0     0\n",
+      "...               ...        ...   ...\n",
+      "34396       124901321          0     0\n",
+      "34397       124902403          0     0\n",
+      "34398       101929614          0     0\n",
+      "34399       107984888          0     0\n",
+      "34400       124900697          0     0\n",
+      "\n",
+      "[34401 rows x 3 columns]\n"
+     ]
+    },
+    {
+     "ename": "KeyError",
+     "evalue": "'ensembl_gene_id'",
+     "output_type": "error",
+     "traceback": [
+      "\u001B[0;31m---------------------------------------------------------------------------\u001B[0m",
+      "\u001B[0;31mKeyError\u001B[0m                                  Traceback (most recent call last)",
+      "File \u001B[0;32m~/Projects/COMO/.venv/lib/python3.10/site-packages/pandas/core/indexes/base.py:3805\u001B[0m, in \u001B[0;36mIndex.get_loc\u001B[0;34m(self, key)\u001B[0m\n\u001B[1;32m   3804\u001B[0m \u001B[38;5;28;01mtry\u001B[39;00m:\n\u001B[0;32m-> 3805\u001B[0m     \u001B[38;5;28;01mreturn\u001B[39;00m \u001B[38;5;28;43mself\u001B[39;49m\u001B[38;5;241;43m.\u001B[39;49m\u001B[43m_engine\u001B[49m\u001B[38;5;241;43m.\u001B[39;49m\u001B[43mget_loc\u001B[49m\u001B[43m(\u001B[49m\u001B[43mcasted_key\u001B[49m\u001B[43m)\u001B[49m\n\u001B[1;32m   3806\u001B[0m \u001B[38;5;28;01mexcept\u001B[39;00m \u001B[38;5;167;01mKeyError\u001B[39;00m \u001B[38;5;28;01mas\u001B[39;00m err:\n",
+      "File \u001B[0;32mindex.pyx:167\u001B[0m, in \u001B[0;36mpandas._libs.index.IndexEngine.get_loc\u001B[0;34m()\u001B[0m\n",
+      "File \u001B[0;32mindex.pyx:196\u001B[0m, in \u001B[0;36mpandas._libs.index.IndexEngine.get_loc\u001B[0;34m()\u001B[0m\n",
+      "File \u001B[0;32mpandas/_libs/hashtable_class_helper.pxi:7081\u001B[0m, in \u001B[0;36mpandas._libs.hashtable.PyObjectHashTable.get_item\u001B[0;34m()\u001B[0m\n",
+      "File \u001B[0;32mpandas/_libs/hashtable_class_helper.pxi:7089\u001B[0m, in \u001B[0;36mpandas._libs.hashtable.PyObjectHashTable.get_item\u001B[0;34m()\u001B[0m\n",
+      "\u001B[0;31mKeyError\u001B[0m: 'ensembl_gene_id'",
+      "\nThe above exception was the direct cause of the following exception:\n",
+      "\u001B[0;31mKeyError\u001B[0m                                  Traceback (most recent call last)",
+      "Cell \u001B[0;32mIn[4], line 11\u001B[0m\n\u001B[1;32m      8\u001B[0m cutoff \u001B[38;5;241m=\u001B[39m \u001B[38;5;241m-\u001B[39m\u001B[38;5;241m3\u001B[39m\n\u001B[1;32m     10\u001B[0m \u001B[38;5;28;01mfor\u001B[39;00m i, context \u001B[38;5;129;01min\u001B[39;00m \u001B[38;5;28menumerate\u001B[39m(context_names):\n\u001B[0;32m---> 11\u001B[0m     \u001B[38;5;28;01mawait\u001B[39;00m rnaseq_gen(  \u001B[38;5;66;03m# noqa\u001B[39;00m\n\u001B[1;32m     12\u001B[0m         context_name\u001B[38;5;241m=\u001B[39mcontext,\n\u001B[1;32m     13\u001B[0m         input_rnaseq_filepath\u001B[38;5;241m=\u001B[39mtrna_matrix_filepath[i],\n\u001B[1;32m     14\u001B[0m         input_gene_info_filepath\u001B[38;5;241m=\u001B[39mgene_info_filepath[i],\n\u001B[1;32m     15\u001B[0m         output_rnaseq_filepath\u001B[38;5;241m=\u001B[39mtrna_matrix_filepath[i],\n\u001B[1;32m     16\u001B[0m         prep\u001B[38;5;241m=\u001B[39mRNAPrepMethod\u001B[38;5;241m.\u001B[39mTOTAL,\n\u001B[1;32m     17\u001B[0m         taxon\u001B[38;5;241m=\u001B[39mtaxon_id,\n\u001B[1;32m     18\u001B[0m         input_metadata_filepath\u001B[38;5;241m=\u001B[39mPath(\u001B[38;5;124m\"\u001B[39m\u001B[38;5;124m./data/config_sheets/trna_config.xlsx\u001B[39m\u001B[38;5;124m\"\u001B[39m),\n\u001B[1;32m     19\u001B[0m         replicate_ratio\u001B[38;5;241m=\u001B[39mreplicate_ratio,\n\u001B[1;32m     20\u001B[0m         high_replicate_ratio\u001B[38;5;241m=\u001B[39mhigh_confidence_replicate_ratio,\n\u001B[1;32m     21\u001B[0m         batch_ratio\u001B[38;5;241m=\u001B[39mbatch_ratio,\n\u001B[1;32m     22\u001B[0m         high_batch_ratio\u001B[38;5;241m=\u001B[39mhigh_confidence_batch_ratio,\n\u001B[1;32m     23\u001B[0m         technique\u001B[38;5;241m=\u001B[39mtechnique,\n\u001B[1;32m     24\u001B[0m         cutoff\u001B[38;5;241m=\u001B[39mcutoff\n\u001B[1;32m     25\u001B[0m     )\n",
+      "File \u001B[0;32m~/Projects/COMO/main/como/rnaseq_gen.py:808\u001B[0m, in \u001B[0;36mrnaseq_gen\u001B[0;34m(context_name, input_rnaseq_filepath, input_gene_info_filepath, output_rnaseq_filepath, prep, taxon, input_metadata_filepath, input_metadata_df, replicate_ratio, high_replicate_ratio, batch_ratio, high_batch_ratio, technique, cutoff)\u001B[0m\n\u001B[1;32m    805\u001B[0m logger\u001B[38;5;241m.\u001B[39mdebug(\u001B[38;5;124mf\u001B[39m\u001B[38;5;124m\"\u001B[39m\u001B[38;5;124mStarting \u001B[39m\u001B[38;5;124m'\u001B[39m\u001B[38;5;132;01m{\u001B[39;00mcontext_name\u001B[38;5;132;01m}\u001B[39;00m\u001B[38;5;124m'\u001B[39m\u001B[38;5;124m\"\u001B[39m)\n\u001B[1;32m    806\u001B[0m output_rnaseq_filepath\u001B[38;5;241m.\u001B[39mparent\u001B[38;5;241m.\u001B[39mmkdir(parents\u001B[38;5;241m=\u001B[39m\u001B[38;5;28;01mTrue\u001B[39;00m, exist_ok\u001B[38;5;241m=\u001B[39m\u001B[38;5;28;01mTrue\u001B[39;00m)\n\u001B[0;32m--> 808\u001B[0m \u001B[38;5;28;01mawait\u001B[39;00m _save_rnaseq_tests(\n\u001B[1;32m    809\u001B[0m     context_name\u001B[38;5;241m=\u001B[39mcontext_name,\n\u001B[1;32m    810\u001B[0m     rnaseq_matrix\u001B[38;5;241m=\u001B[39m\u001B[38;5;28;01mawait\u001B[39;00m _read_counts(input_rnaseq_filepath),\n\u001B[1;32m    811\u001B[0m     metadata_df\u001B[38;5;241m=\u001B[39minput_metadata_df \u001B[38;5;129;01mor\u001B[39;00m \u001B[38;5;28;01mawait\u001B[39;00m _create_metadata_df(input_metadata_filepath),\n\u001B[1;32m    812\u001B[0m     gene_info_df\u001B[38;5;241m=\u001B[39mpd\u001B[38;5;241m.\u001B[39mread_csv(input_gene_info_filepath),\n\u001B[1;32m    813\u001B[0m     output_filepath\u001B[38;5;241m=\u001B[39moutput_rnaseq_filepath,\n\u001B[1;32m    814\u001B[0m     prep\u001B[38;5;241m=\u001B[39mprep,\n\u001B[1;32m    815\u001B[0m     taxon\u001B[38;5;241m=\u001B[39mtaxon,\n\u001B[1;32m    816\u001B[0m     replicate_ratio\u001B[38;5;241m=\u001B[39mreplicate_ratio,\n\u001B[1;32m    817\u001B[0m     batch_ratio\u001B[38;5;241m=\u001B[39mbatch_ratio,\n\u001B[1;32m    818\u001B[0m     high_replicate_ratio\u001B[38;5;241m=\u001B[39mhigh_replicate_ratio,\n\u001B[1;32m    819\u001B[0m     high_batch_ratio\u001B[38;5;241m=\u001B[39mhigh_batch_ratio,\n\u001B[1;32m    820\u001B[0m     technique\u001B[38;5;241m=\u001B[39mtechnique,\n\u001B[1;32m    821\u001B[0m     cut_off\u001B[38;5;241m=\u001B[39mcutoff,\n\u001B[1;32m    822\u001B[0m )\n",
+      "File \u001B[0;32m~/Projects/COMO/main/como/rnaseq_gen.py:671\u001B[0m, in \u001B[0;36m_save_rnaseq_tests\u001B[0;34m(context_name, rnaseq_matrix, metadata_df, gene_info_df, output_filepath, prep, taxon, replicate_ratio, batch_ratio, high_replicate_ratio, high_batch_ratio, technique, cut_off)\u001B[0m\n\u001B[1;32m    662\u001B[0m \u001B[38;5;250m\u001B[39m\u001B[38;5;124;03m\"\"\"Save the results of the RNA-Seq tests to a CSV file.\"\"\"\u001B[39;00m\n\u001B[1;32m    663\u001B[0m filtering_options \u001B[38;5;241m=\u001B[39m _FilteringOptions(\n\u001B[1;32m    664\u001B[0m     replicate_ratio\u001B[38;5;241m=\u001B[39mreplicate_ratio,\n\u001B[1;32m    665\u001B[0m     batch_ratio\u001B[38;5;241m=\u001B[39mbatch_ratio,\n\u001B[0;32m   (...)\u001B[0m\n\u001B[1;32m    668\u001B[0m     high_batch_ratio\u001B[38;5;241m=\u001B[39mhigh_batch_ratio,\n\u001B[1;32m    669\u001B[0m )\n\u001B[0;32m--> 671\u001B[0m read_counts_results: _ReadMatrixResults \u001B[38;5;241m=\u001B[39m \u001B[38;5;28;01mawait\u001B[39;00m _build_matrix_results(\n\u001B[1;32m    672\u001B[0m     matrix\u001B[38;5;241m=\u001B[39mrnaseq_matrix,\n\u001B[1;32m    673\u001B[0m     gene_info\u001B[38;5;241m=\u001B[39mgene_info_df,\n\u001B[1;32m    674\u001B[0m     metadata_df\u001B[38;5;241m=\u001B[39mmetadata_df,\n\u001B[1;32m    675\u001B[0m     taxon\u001B[38;5;241m=\u001B[39mtaxon,\n\u001B[1;32m    676\u001B[0m )\n\u001B[1;32m    677\u001B[0m metrics \u001B[38;5;241m=\u001B[39m read_counts_results\u001B[38;5;241m.\u001B[39mmetrics\n\u001B[1;32m    678\u001B[0m entrez_gene_ids \u001B[38;5;241m=\u001B[39m read_counts_results\u001B[38;5;241m.\u001B[39mentrez_gene_ids\n",
+      "File \u001B[0;32m~/Projects/COMO/main/como/rnaseq_gen.py:201\u001B[0m, in \u001B[0;36m_build_matrix_results\u001B[0;34m(matrix, gene_info, metadata_df, taxon)\u001B[0m\n\u001B[1;32m    199\u001B[0m gene_info \u001B[38;5;241m=\u001B[39m gene_info_migrations(gene_info)\n\u001B[1;32m    200\u001B[0m \u001B[38;5;28mprint\u001B[39m(matrix)\n\u001B[0;32m--> 201\u001B[0m conversion \u001B[38;5;241m=\u001B[39m \u001B[38;5;28;01mawait\u001B[39;00m ensembl_to_gene_id_and_symbol(ids\u001B[38;5;241m=\u001B[39m\u001B[43mmatrix\u001B[49m\u001B[43m[\u001B[49m\u001B[38;5;124;43m\"\u001B[39;49m\u001B[38;5;124;43mensembl_gene_id\u001B[39;49m\u001B[38;5;124;43m\"\u001B[39;49m\u001B[43m]\u001B[49m\u001B[38;5;241m.\u001B[39mtolist(), taxon\u001B[38;5;241m=\u001B[39mtaxon)\n\u001B[1;32m    202\u001B[0m matrix \u001B[38;5;241m=\u001B[39m matrix\u001B[38;5;241m.\u001B[39mmerge(conversion, on\u001B[38;5;241m=\u001B[39m\u001B[38;5;124m\"\u001B[39m\u001B[38;5;124mensembl_gene_id\u001B[39m\u001B[38;5;124m\"\u001B[39m, how\u001B[38;5;241m=\u001B[39m\u001B[38;5;124m\"\u001B[39m\u001B[38;5;124mleft\u001B[39m\u001B[38;5;124m\"\u001B[39m)\n\u001B[1;32m    204\u001B[0m \u001B[38;5;66;03m# Only include Entrez and Ensembl Gene IDs that are present in `gene_info`\u001B[39;00m\n",
+      "File \u001B[0;32m~/Projects/COMO/.venv/lib/python3.10/site-packages/pandas/core/frame.py:4102\u001B[0m, in \u001B[0;36mDataFrame.__getitem__\u001B[0;34m(self, key)\u001B[0m\n\u001B[1;32m   4100\u001B[0m \u001B[38;5;28;01mif\u001B[39;00m \u001B[38;5;28mself\u001B[39m\u001B[38;5;241m.\u001B[39mcolumns\u001B[38;5;241m.\u001B[39mnlevels \u001B[38;5;241m>\u001B[39m \u001B[38;5;241m1\u001B[39m:\n\u001B[1;32m   4101\u001B[0m     \u001B[38;5;28;01mreturn\u001B[39;00m \u001B[38;5;28mself\u001B[39m\u001B[38;5;241m.\u001B[39m_getitem_multilevel(key)\n\u001B[0;32m-> 4102\u001B[0m indexer \u001B[38;5;241m=\u001B[39m \u001B[38;5;28;43mself\u001B[39;49m\u001B[38;5;241;43m.\u001B[39;49m\u001B[43mcolumns\u001B[49m\u001B[38;5;241;43m.\u001B[39;49m\u001B[43mget_loc\u001B[49m\u001B[43m(\u001B[49m\u001B[43mkey\u001B[49m\u001B[43m)\u001B[49m\n\u001B[1;32m   4103\u001B[0m \u001B[38;5;28;01mif\u001B[39;00m is_integer(indexer):\n\u001B[1;32m   4104\u001B[0m     indexer \u001B[38;5;241m=\u001B[39m [indexer]\n",
+      "File \u001B[0;32m~/Projects/COMO/.venv/lib/python3.10/site-packages/pandas/core/indexes/base.py:3812\u001B[0m, in \u001B[0;36mIndex.get_loc\u001B[0;34m(self, key)\u001B[0m\n\u001B[1;32m   3807\u001B[0m     \u001B[38;5;28;01mif\u001B[39;00m \u001B[38;5;28misinstance\u001B[39m(casted_key, \u001B[38;5;28mslice\u001B[39m) \u001B[38;5;129;01mor\u001B[39;00m (\n\u001B[1;32m   3808\u001B[0m         \u001B[38;5;28misinstance\u001B[39m(casted_key, abc\u001B[38;5;241m.\u001B[39mIterable)\n\u001B[1;32m   3809\u001B[0m         \u001B[38;5;129;01mand\u001B[39;00m \u001B[38;5;28many\u001B[39m(\u001B[38;5;28misinstance\u001B[39m(x, \u001B[38;5;28mslice\u001B[39m) \u001B[38;5;28;01mfor\u001B[39;00m x \u001B[38;5;129;01min\u001B[39;00m casted_key)\n\u001B[1;32m   3810\u001B[0m     ):\n\u001B[1;32m   3811\u001B[0m         \u001B[38;5;28;01mraise\u001B[39;00m InvalidIndexError(key)\n\u001B[0;32m-> 3812\u001B[0m     \u001B[38;5;28;01mraise\u001B[39;00m \u001B[38;5;167;01mKeyError\u001B[39;00m(key) \u001B[38;5;28;01mfrom\u001B[39;00m \u001B[38;5;21;01merr\u001B[39;00m\n\u001B[1;32m   3813\u001B[0m \u001B[38;5;28;01mexcept\u001B[39;00m \u001B[38;5;167;01mTypeError\u001B[39;00m:\n\u001B[1;32m   3814\u001B[0m     \u001B[38;5;66;03m# If we have a listlike key, _check_indexing_error will raise\u001B[39;00m\n\u001B[1;32m   3815\u001B[0m     \u001B[38;5;66;03m#  InvalidIndexError. Otherwise we fall through and re-raise\u001B[39;00m\n\u001B[1;32m   3816\u001B[0m     \u001B[38;5;66;03m#  the TypeError.\u001B[39;00m\n\u001B[1;32m   3817\u001B[0m     \u001B[38;5;28mself\u001B[39m\u001B[38;5;241m.\u001B[39m_check_indexing_error(key)\n",
+      "\u001B[0;31mKeyError\u001B[0m: 'ensembl_gene_id'"
+     ]
+    }
+   ],
    "source": [
-    "# step 2.2 RNA-seq Analysis for Total RNA-seq library preparation\n",
+    "from como.rnaseq_gen import FilteringTechnique, rnaseq_gen\n",
     "\n",
-    "trnaseq_config_file = \"trnaseq_data_inputs_auto.xlsx\"\n",
-    "rep_ratio = 0.75\n",
-    "group_ratio = 0.75\n",
-    "rep_ratio_h = 1.0\n",
-    "group_ratio_h = 1.0\n",
-    "technique = \"zFPKM\"\n",
-    "minimum_cutoff = -3\n",
-    "taxon_id = \"human\"\n",
-    "\n",
-    "# fmt: off\n",
-    "cmd = \" \".join(\n",
-    "    [\n",
-    "        \"python3\", \"como/rnaseq_gen.py\",\n",
-    "        \"--config-file\", trnaseq_config_file,\n",
-    "        \"--replicate-ratio\", str(rep_ratio),\n",
-    "        \"--batch-ratio\", str(group_ratio),\n",
-    "        \"--high-replicate-ratio\", str(rep_ratio_h),\n",
-    "        \"--high-batch-ratio\", str(group_ratio_h),\n",
-    "        \"--minimum-cutoff\", str(minimum_cutoff),\n",
-    "        \"--filt-technique\", f\"{technique}\",\n",
-    "        \"--library-prep\", \"total\",\n",
-    "        \"--taxon-id\", taxon_id\n",
-    "    ]\n",
-    ")\n",
-    "# fmt: on\n",
-    "\n",
-    "!{cmd}"
+    "replicate_ratio = 0.75\n",
+    "high_confidence_replicate_ratio = 1.0\n",
+    "batch_ratio = 0.75\n",
+    "high_confidence_batch_ratio = 1.0\n",
+    "technique = FilteringTechnique.zfpkm\n",
+    "cutoff = -3\n",
+    "\n",
+    "for i, context in enumerate(context_names):\n",
+    "    await rnaseq_gen(\n",
+    "        context_name=context,\n",
+    "        input_rnaseq_filepath=trna_matrix_filepath[i],\n",
+    "        input_gene_info_filepath=gene_info_filepath[i],\n",
+    "        output_rnaseq_filepath=trna_matrix_filepath[i],\n",
+    "        prep=RNAPrepMethod.TOTAL,\n",
+    "        taxon=taxon_id,\n",
+    "        input_metadata_filepath=Path(\"./data/config_sheets/trna_config.xlsx\"),\n",
+    "        replicate_ratio=replicate_ratio,\n",
+    "        high_replicate_ratio=high_confidence_replicate_ratio,\n",
+    "        batch_ratio=batch_ratio,\n",
+    "        high_batch_ratio=high_confidence_batch_ratio,\n",
+    "        technique=technique,\n",
+    "        cutoff=cutoff\n",
+    "    )"
    ]
   },
   {
@@ -429,33 +494,31 @@
    "metadata": {},
    "outputs": [],
    "source": [
-    "mrnaseq_config_file = \"mrnaseq_data_inputs_auto.xlsx\"\n",
-    "rep_ratio = 0.75\n",
-    "group_ratio = 0.75\n",
-    "rep_ratio_h = 1.0\n",
-    "group_ratio_h = 1.0\n",
-    "technique = \"zfpkm\"\n",
-    "minimum_cutoff = -3\n",
-    "taxon_id = \"human\"\n",
-    "\n",
-    "# fmt: off\n",
-    "cmd = \" \".join(\n",
-    "    [\n",
-    "        \"python3\", \"como/rnaseq_gen.py\",\n",
-    "        \"--config-file\", mrnaseq_config_file,\n",
-    "        \"--replicate-ratio\", str(rep_ratio),\n",
-    "        \"--batch-ratio\", str(group_ratio),\n",
-    "        \"--high-replicate-ratio\", str(rep_ratio_h),\n",
-    "        \"--high-batch-ratio\", str(group_ratio_h),\n",
-    "        \"--minimum-cutoff\", str(minimum_cutoff),\n",
-    "        \"--filt-technique\", f\"{technique}\",\n",
-    "        \"--library-prep\", \"mrna\",\n",
-    "        \"--taxon-id\", taxon_id\n",
-    "    ]\n",
-    ")\n",
-    "# fmt: on\n",
+    "from como.rnaseq_gen import FilteringTechnique, rnaseq_gen\n",
     "\n",
-    "!{cmd}"
+    "replicate_ratio = 0.75\n",
+    "high_confidence_replicate_ratio = 1.0\n",
+    "batch_ratio = 0.75\n",
+    "high_confidence_batch_ratio = 1.0\n",
+    "technique = FilteringTechnique.zfpkm\n",
+    "cutoff = -3\n",
+    "\n",
+    "for i, context in enumerate(context_names):\n",
+    "    await rnaseq_gen(\n",
+    "        context_name=context,\n",
+    "        input_rnaseq_filepath=polya_matrix_filepath[i],\n",
+    "        input_gene_info_filepath=gene_info_filepath[i],\n",
+    "        output_rnaseq_filepath=polya_matrix_filepath[i],\n",
+    "        prep=RNAPrepMethod.MRNA,\n",
+    "        taxon=taxon_id,\n",
+    "        input_metadata_filepath=Path(\"./data/config_sheets/mrna_config.xlsx\"),\n",
+    "        replicate_ratio=replicate_ratio,\n",
+    "        high_replicate_ratio=high_confidence_replicate_ratio,\n",
+    "        batch_ratio=batch_ratio,\n",
+    "        high_batch_ratio=high_confidence_batch_ratio,\n",
+    "        technique=technique,\n",
+    "        cutoff=cutoff\n",
+    "    )"
    ]
   },
   {
@@ -483,33 +546,31 @@
    "metadata": {},
    "outputs": [],
    "source": [
-    "scrnaseq_config_file = \"scrnaseq_data_inputs_auto.xlsx\"\n",
-    "rep_ratio = 0.75\n",
-    "group_ratio = 0.75\n",
-    "rep_ratio_h = 1.0\n",
-    "group_ratio_h = 1.0\n",
-    "quantile = 50\n",
-    "minimum_cutoff = -3\n",
-    "taxon_id = \"human\"\n",
+    "from como.rnaseq_gen import FilteringTechnique, rnaseq_gen\n",
     "\n",
-    "# fmt: off\n",
-    "cmd = \" \".join(\n",
-    "    [\n",
-    "        \"python3\", \"como/rnaseq_gen.py\",\n",
-    "        \"--config-file\", scrnaseq_config_file,\n",
-    "        \"--replicate-ratio\", str(rep_ratio),\n",
-    "        \"--batch-ratio\", str(group_ratio),\n",
-    "        \"--high-replicate-ratio\", str(rep_ratio_h),\n",
-    "        \"--high-batch-ratio\", str(group_ratio_h),\n",
-    "        \"--minimum-cutoff\", str(minimum_cutoff),\n",
-    "        \"--filt-technique\", \"umi\",\n",
-    "        \"--library-prep\", \"scrna\",\n",
-    "        \"--taxon-id\", taxon_id\n",
-    "    ]\n",
-    ")\n",
-    "# fmt: on\n",
-    "\n",
-    "!{cmd}"
+    "replicate_ratio = 0.75\n",
+    "high_confidence_replicate_ratio = 1.0\n",
+    "batch_ratio = 0.75\n",
+    "high_confidence_batch_ratio = 1.0\n",
+    "technique = FilteringTechnique.umi\n",
+    "cutoff = -3\n",
+    "\n",
+    "for i, context in enumerate(context_names):\n",
+    "    await rnaseq_gen(\n",
+    "        context_name=context,\n",
+    "        input_rnaseq_filepath=scrna_matrix_filepath[i],\n",
+    "        input_gene_info_filepath=gene_info_filepath[i],\n",
+    "        output_rnaseq_filepath=scrna_matrix_filepath[i],\n",
+    "        prep=RNAPrepMethod.SCRNA,\n",
+    "        taxon=taxon_id,\n",
+    "        input_metadata_filepath=Path(\"./data/config_sheets/scrna_config.xlsx\"),\n",
+    "        replicate_ratio=replicate_ratio,\n",
+    "        high_replicate_ratio=high_confidence_replicate_ratio,\n",
+    "        batch_ratio=batch_ratio,\n",
+    "        high_batch_ratio=high_confidence_batch_ratio,\n",
+    "        technique=technique,\n",
+    "        cutoff=cutoff\n",
+    "    )"
    ]
   },
   {
@@ -683,8 +744,8 @@
     "    [\n",
     "        \"python3\", \"como/merge_xomics.py\",\n",
     "        \"--merge-zfpkm-distribution\",\n",
-    "        \"--total-rnaseq-config-file\", trnaseq_config_file,\n",
-    "        \"--mrnaseq-config-file\", mrnaseq_config_file,\n",
+    "        \"--total-rnaseq-config-file\", trnaseq_config_file,  # noqa: F821\n",
+    "        \"--mrnaseq-config-file\", mrnaseq_config_file,  # noqa: F821\n",
     "        # \"--scrnaseq-config-file\", scrnaseq_config_file,  # If using single-cell data, uncomment the start of this line\n",
     "        # \"--proteomics-config-file\", proteomics_config_file,  # If using proteomics data, uncomment the start of this line\n",
     "        \"--requirement-adjust\", requirement_adjust,\n",
diff --git a/main/como/__init__.py b/main/como/__init__.py
index 05100d09..c6ee72e2 100644
--- a/main/como/__init__.py
+++ b/main/como/__init__.py
@@ -4,7 +4,7 @@
 from como.utils import stringlist_to_list
 
 __all__ = ["stringlist_to_list", "Config"]
-__version__ = "1.10.0"
+__version__ = "1.11.1"
 
 
 def return_placeholder_data() -> pd.DataFrame:
diff --git a/main/como/create_context_specific_model.py b/main/como/create_context_specific_model.py
index 373dab92..f6ac8b91 100644
--- a/main/como/create_context_specific_model.py
+++ b/main/como/create_context_specific_model.py
@@ -308,9 +308,16 @@ def _build_with_imat(
     expr_vector: npt.NDArray,
     expr_thesh: tuple[float, float],
     force_gene_ids: Sequence[int],
+    solver: str,
 ) -> (cobra.Model, pd.DataFrame):
     expr_vector = np.array(expr_vector)
-    properties = IMATProperties(exp_vector=expr_vector, exp_thresholds=expr_thesh, core=force_gene_ids, epsilon=0.01)
+    properties = IMATProperties(
+        exp_vector=expr_vector,
+        exp_thresholds=expr_thesh,
+        core=force_gene_ids,
+        epsilon=0.01,
+        solver=solver.upper(),
+    )
     algorithm = IMAT(s_matrix, np.array(lb), np.array(ub), properties)
     context_rxns: npt.NDArray = algorithm.run()
     fluxes: pd.Series = algorithm.sol.to_series()
@@ -507,7 +514,14 @@ def _build_model(  # noqa: C901
     elif recon_algorithm == Algorithm.IMAT:
         context_model_cobra: cobra.Model
         context_model_cobra, flux_df = _build_with_imat(
-            reference_model, s_matrix, lb, ub, expr_vector, exp_thresh, idx_force
+            reference_model,
+            s_matrix,
+            lb,
+            ub,
+            expr_vector,
+            exp_thresh,
+            idx_force,
+            solver=solver,
         )
         imat_reactions = flux_df.rxn
         model_reactions = [reaction.id for reaction in context_model_cobra.reactions]
@@ -550,6 +564,7 @@ def _collect_boundary_reactions(path: Path) -> _BoundaryReactions:
     df = _create_df(path)
     for column in df.columns:
         if column not in [
+            "boundary",
             "reaction",
             "abbreviation",
             "compartment",
@@ -561,31 +576,25 @@ def _collect_boundary_reactions(path: Path) -> _BoundaryReactions:
                 f"'Minimum Reaction Rate', and 'Maximum Reaction Rate'. Found: {column}"
             )
 
-    reactions: list[str] = []
+    reactions: list[str] = [""] * len(df)
     boundary_type: list[str] = df["reaction"].tolist()
     reaction_abbreviation: list[str] = df["abbreviation"].tolist()
     reaction_compartment: list[str] = df["compartment"].tolist()
-    lower_bounds = df["minimum reaction rate"].tolist()
-    upper_bounds = df["maximum reaction rate"].tolist()
+    lower_bound = df["minimum reaction rate"].tolist()
+    upper_bound = df["maximum reaction rate"].tolist()
+    boundary_map = {"exchange": "EX", "demand": "DM", "sink": "SK"}
     for i in range(len(boundary_type)):
-        current_type: str = boundary_type[i]
-        temp_reaction: str = ""
-
-        match current_type.lower():
-            case "exchange":
-                temp_reaction += "EX_"
-            case "demand":
-                temp_reaction += "DM_"
-            case "sink":
-                temp_reaction += "SK_"
+        boundary: str = boundary_type[i].lower()
+        if boundary not in boundary_map:
+            raise ValueError(f"Boundary reaction type must be 'Exchange', 'Demand', or 'Sink'. Found: {boundary[i]}")
 
         shorthand_compartment = Compartments.get(reaction_compartment[i])
-        temp_reaction += f"{reaction_abbreviation[i]}[{shorthand_compartment}]"
-        reactions.append(temp_reaction)
+        reactions[i] = f"{boundary_map.get(boundary)}_{reaction_abbreviation[i]}[{shorthand_compartment}]"
+
     return _BoundaryReactions(
         reactions=reactions,
-        lower_bounds=lower_bounds,
-        upper_bounds=upper_bounds,
+        lower_bounds=lower_bound,
+        upper_bounds=upper_bound,
     )
 
 
@@ -631,10 +640,10 @@ def create_context_specific_model(  # noqa: C901
             raise ValueError(f"Output file type {output_type} not recognized. Must be one of: 'xml', 'mat', 'json'")
 
     if algorithm not in Algorithm:
-        raise ValueError(f"Algorithm {algorithm} not supported. Please use one of: GIMME, FASTCORE, or IMAT")
+        raise ValueError(f"Algorithm {algorithm} not supported. Use one of {', '.join(a.value for a in Algorithm)}")
 
     if solver not in Solver:
-        raise ValueError(f"Solver '{solver}' not supported. Use 'GLPK' or 'GUROBI'")
+        raise ValueError(f"Solver '{solver}' not supported. Use one of {', '.join(s.value for s in Solver)}")
 
     if boundary_rxns_filepath:
         boundary_reactions = _collect_boundary_reactions(boundary_rxns_filepath)
@@ -667,7 +676,7 @@ def create_context_specific_model(  # noqa: C901
         bound_ub=boundary_reactions.upper_bounds,
         exclude_rxns=exclude_rxns,
         force_rxns=force_rxns,
-        solver=solver.value,
+        solver=solver.value.lower(),
         low_thresh=low_threshold,
         high_thresh=high_threshold,
     )
diff --git a/main/como/rnaseq.py b/main/como/rnaseq.py
deleted file mode 100644
index cf45f39c..00000000
--- a/main/como/rnaseq.py
+++ /dev/null
@@ -1,764 +0,0 @@
-from __future__ import annotations
-
-import gc
-import math
-import multiprocessing
-import time
-from collections import namedtuple
-from dataclasses import dataclass, field
-from enum import Enum
-from functools import partial
-from multiprocessing.pool import Pool
-from pathlib import Path
-from typing import Callable, NamedTuple
-
-import numpy as np
-import numpy.typing as npt
-import pandas as pd
-import plotly.graph_objs as go
-import scanpy as sc
-import sklearn
-import sklearn.neighbors
-from fast_bioservices import Taxon
-from fast_bioservices.pipeline import ensembl_to_gene_id_and_symbol
-from loguru import logger
-from pandas import DataFrame
-from plotly.subplots import make_subplots
-from scipy.signal import find_peaks
-from sklearn.neighbors import KernelDensity
-
-from como.migrations import gene_info_migrations
-from como.types import RNAPrepMethod
-from como.utils import convert_gene_data
-
-
-class _FilteringOptions(NamedTuple):
-    replicate_ratio: float
-    batch_ratio: float
-    cut_off: float
-    high_replicate_ratio: float
-    high_batch_ratio: float
-
-
-class FilteringTechnique(Enum):
-    """RNA sequencing filtering capabilities."""
-
-    cpm = "cpm"
-    zfpkm = "zfpkm"
-    tpm = "quantile"
-    umi = "umi"
-
-    @staticmethod
-    def from_string(value: str) -> FilteringTechnique:
-        """Create a filtering technique object from a string."""
-        match value.lower():
-            case "cpm":
-                return FilteringTechnique.cpm
-            case "zfpkm":
-                return FilteringTechnique.zfpkm
-            case "quantile":
-                return FilteringTechnique.tpm
-            case "umi":
-                return FilteringTechnique.umi
-            case _:
-                possible_values = [t.value for t in FilteringTechnique]
-                raise ValueError(f"Filtering technique must be one of {possible_values}; got: {value}")
-
-
-class LayoutMethod(Enum):
-    """RNA sequencing layout method."""
-
-    paired_end = "paired-end"
-    single_end = "single-end"
-
-
-@dataclass
-class _StudyMetrics:
-    study: str
-    num_samples: int
-    count_matrix: pd.DataFrame
-    fragment_lengths: npt.NDArray[np.float32]
-    sample_names: list[str]
-    layout: list[LayoutMethod]
-    entrez_gene_ids: list[str]
-    gene_sizes: npt.NDArray[np.float32]
-    __normalization_matrix: pd.DataFrame = field(default_factory=pd.DataFrame)
-    __z_score_matrix: pd.DataFrame = field(default_factory=pd.DataFrame)
-    __high_confidence_entrez_gene_ids: list[str] = field(default=list)
-
-    def __post_init__(self):
-        for layout in self.layout:
-            if layout not in LayoutMethod:
-                raise ValueError(f"Layout must be 'paired-end' or 'single-end'; got: {layout}")
-
-    @property
-    def normalization_matrix(self) -> pd.DataFrame:
-        return self.__normalization_matrix
-
-    @normalization_matrix.setter
-    def normalization_matrix(self, value: pd.DataFrame) -> None:
-        self.__normalization_matrix = value
-
-    @property
-    def z_score_matrix(self) -> pd.DataFrame:
-        return self.__z_score_matrix
-
-    @z_score_matrix.setter
-    def z_score_matrix(self, value: pd.DataFrame) -> None:
-        self.__z_score_matrix = value
-
-    @property
-    def high_confidence_entrez_gene_ids(self) -> list[str]:
-        return self.__high_confidence_entrez_gene_ids
-
-    @high_confidence_entrez_gene_ids.setter
-    def high_confidence_entrez_gene_ids(self, values: list[str]) -> None:
-        self.__high_confidence_entrez_gene_ids = values
-
-
-Density = namedtuple("Density", ["x", "y"])
-
-
-class _ZFPKMResult(NamedTuple):
-    zfpkm: pd.Series
-    density: Density
-    mu: float
-    std_dev: float
-    max_fpkm: float
-
-
-class _ReadMatrixResults(NamedTuple):
-    metrics: dict[str, _StudyMetrics]
-    entrez_gene_ids: list[str]
-
-
-NamedMetrics = dict[str, _StudyMetrics]
-
-
-def k_over_a(k: int, a: float) -> Callable[[npt.NDArray], bool]:
-    """Return a function that filters rows of an array based on the sum of elements being greater than or equal to A at least k times.
-
-    This code is based on the `kOverA` function found in R's `genefilter` package: https://www.rdocumentation.org/packages/genefilter/versions/1.54.2/topics/kOverA
-
-    :param k: The minimum number of times the sum of elements must be greater than or equal to A.
-    :param a: The value to compare the sum of elements to.
-    :return: A function that accepts a NumPy array to perform the actual filtering
-    """  # noqa: E501
-
-    def filter_func(row: npt.NDArray) -> bool:
-        return np.sum(row >= a) >= k
-
-    return filter_func
-
-
-def genefilter(data: pd.DataFrame | npt.NDArray, filter_func: Callable[[npt.NDArray], bool]) -> npt.NDArray:
-    """Apply a filter function to the rows of the data and return the filtered array.
-
-    This code is based on the `genefilter` function found in R's `genefilter` package: https://www.rdocumentation.org/packages/genefilter/versions/1.54.2/topics/genefilter
-
-    :param data: The data to filter
-    :param filter_func: THe function to filter the data by
-    :return: A NumPy array of the filtered data.
-    """
-    match type(data):
-        case pd.DataFrame:
-            return data.apply(filter_func, axis=1).values
-        case npt.NDArray:
-            return np.apply_along_axis(filter_func, axis=1, arr=data)
-        case _:
-            raise ValueError("Unsupported data type. Must be a Pandas DataFrame or a NumPy array.")
-
-
-async def _read_counts_matrix(
-    *,
-    context_name: str,
-    counts_matrix_filepath: Path,
-    config_filepath: Path,
-    gene_info_filepath: Path,
-    taxon_id: Taxon,
-) -> _ReadMatrixResults:
-    """Read the counts matrix and returns the results.
-
-    :param context_name: The context name being processed. Usually a cell type, but can be any string
-    :param counts_matrix_filepath: The file path to the gene count matrix
-    :param config_filepath: The file path to the Excel configuration file
-    :param gene_info_filepath: The file path to gene information generated by `rnaseq_preprocess.py`
-    :param taxon_id: The NCBI Taxon ID
-    :return: A dataclass `ReadMatrixResults`
-    """
-    logger.trace(f"Reading config_filepath at '{config_filepath}'")
-    config_df: pd.DataFrame = pd.read_excel(config_filepath, sheet_name=context_name, header=0)
-    gene_info: pd.DataFrame = pd.read_csv(gene_info_filepath)
-    gene_info = gene_info[gene_info["ensembl_gene_id"] != "-"].reset_index(drop=True)
-    gene_info = gene_info_migrations(gene_info)
-
-    match counts_matrix_filepath.suffix:
-        case ".csv":
-            logger.debug(f"Reading CSV file at '{counts_matrix_filepath}'")
-            counts_matrix: pd.DataFrame = pd.read_csv(counts_matrix_filepath, header=0)
-            if "ensembl_gene_id" not in counts_matrix.columns:
-                raise ValueError(
-                    f"Counts matrix must contain a column named 'ensembl_gene_id'. "
-                    f"Ensure the file '{counts_matrix_filepath}' contains this column."
-                )
-            conversion = await ensembl_to_gene_id_and_symbol(
-                ids=counts_matrix["ensembl_gene_id"].tolist(), taxon=taxon_id
-            )
-            counts_matrix = counts_matrix.merge(conversion, on="ensembl_gene_id", how="left")
-
-        case ".h5ad":
-            logger.debug(f"Reading h5ad file at '{counts_matrix_filepath}'")
-            adata: sc.AnnData = sc.read_h5ad(counts_matrix_filepath)
-            counts_matrix: pd.DataFrame = adata.to_df().T  # Make sample names the columns and gene data the index
-
-            # Coherce the incoming gene data (i.e., Gene Symbols) into Entrez and Ensembl Gene IDs
-            conversion = await convert_gene_data(counts_matrix.index.tolist(), taxon_id)
-            counts_matrix.index.name = conversion.index.name
-            counts_matrix = counts_matrix.merge(conversion, left_index=True, right_index=True)
-            counts_matrix = counts_matrix[counts_matrix["entrez_gene_id"] != "-"]
-            counts_matrix.reset_index(inplace=True)
-
-            # explicit garbage collection because this function runs for a little while
-            del adata
-            del conversion
-            gc.collect()
-
-        case _:
-            raise ValueError(
-                f"Unknown file extension '{counts_matrix_filepath.suffix}'. Valid options are '.csv' or '.h5ad'."
-            )
-
-    if counts_matrix.empty:
-        raise ValueError(
-            f"Counts matrix is empty. Ensure the file contains data. "
-            f"Attempted to process file '{counts_matrix_filepath}'"
-        )
-
-    # Only include Entrez and Ensembl Gene IDs that are present in `gene_info`
-    counts_matrix["entrez_gene_id"] = counts_matrix["entrez_gene_id"].str.split("//")
-    counts_matrix = counts_matrix.explode("entrez_gene_id")
-    counts_matrix = counts_matrix.replace(to_replace="-", value=pd.NA).dropna()
-    counts_matrix["entrez_gene_id"] = counts_matrix["entrez_gene_id"].astype(int)
-
-    gene_info = gene_info.replace(to_replace="-", value=pd.NA).dropna()
-    gene_info["entrez_gene_id"] = gene_info["entrez_gene_id"].astype(int)
-
-    counts_matrix = counts_matrix.merge(
-        gene_info[["entrez_gene_id", "ensembl_gene_id"]],
-        on=["entrez_gene_id", "ensembl_gene_id"],
-        how="inner",
-    )
-    gene_info = gene_info.merge(
-        counts_matrix[["entrez_gene_id", "ensembl_gene_id"]],
-        on=["entrez_gene_id", "ensembl_gene_id"],
-        how="inner",
-    )
-
-    entrez_gene_ids: list[str] = gene_info["entrez_gene_id"].tolist()
-    metrics: NamedMetrics = {}
-    for study in config_df["study"].unique().tolist():
-        study_sample_names = config_df[config_df["study"] == study]["sample_name"].tolist()
-        layouts = config_df[config_df["study"] == study]["layout"].tolist()
-        metrics[study] = _StudyMetrics(
-            count_matrix=counts_matrix[counts_matrix.columns.intersection(study_sample_names)],
-            fragment_lengths=config_df[config_df["study"] == study]["fragment_length"].values,
-            sample_names=study_sample_names,
-            layout=[LayoutMethod(layout) for layout in layouts],
-            num_samples=len(study_sample_names),
-            entrez_gene_ids=entrez_gene_ids,
-            gene_sizes=np.array(gene_info["size"].values).astype(np.float32),
-            study=study,
-        )
-        metrics[study].fragment_lengths[np.isnan(metrics[study].fragment_lengths)] = 0
-        metrics[study].count_matrix.index = pd.Index(entrez_gene_ids, name="entrez_gene_id")
-
-    return _ReadMatrixResults(metrics=metrics, entrez_gene_ids=gene_info["entrez_gene_id"].tolist())
-
-
-def calculate_tpm(metrics: NamedMetrics) -> NamedMetrics:
-    """Calculate the Transcripts Per Million (TPM) for each sample in the metrics dictionary."""
-    for sample in metrics:
-        count_matrix = metrics[sample].count_matrix
-
-        gene_sizes = metrics[sample].gene_sizes
-
-        tpm_matrix = pd.DataFrame(data=None, index=count_matrix.index, columns=count_matrix.columns)
-        for i in range(len(count_matrix.columns)):
-            values: pd.Series = count_matrix.iloc[:, i] + 1  # Add 1 to prevent division by 0
-            rate = np.log(values.tolist()) - np.log(gene_sizes)
-            denominator = np.log(np.sum(np.exp(rate)))
-            tpm_value = np.exp(rate - denominator + np.log(1e6))
-            tpm_matrix.iloc[:, i] = tpm_value
-        metrics[sample].normalization_matrix = tpm_matrix
-
-    return metrics
-
-
-def calculate_fpkm(metrics: NamedMetrics) -> NamedMetrics:
-    """Calculate the Fragments Per Kilobase of transcript per Million mapped reads (FPKM) for each sample in the metrics dictionary."""  # noqa: E501
-    matrix_values = []
-    for study in metrics:
-        for sample in range(metrics[study].num_samples):
-            layout = metrics[study].layout[sample]
-            count_matrix: npt.NDArray = metrics[study].count_matrix.iloc[:, sample].values
-            gene_size = metrics[study].gene_sizes
-
-            count_matrix = count_matrix.astype(np.float32)
-            gene_size = gene_size.astype(np.float32)
-
-            match layout:
-                case LayoutMethod.paired_end:  # FPKM
-                    mean_fragment_lengths = metrics[study].fragment_lengths[sample]
-                    # Ensure non-negative value
-                    effective_length = [max(0, size - (mean_fragment_lengths + 1)) for size in gene_size]
-                    n = count_matrix.sum()
-                    fpkm = ((count_matrix + 1) * 1e9) / (np.array(effective_length) * n)
-                    matrix_values.append(fpkm)
-                case LayoutMethod.single_end:  # RPKM
-                    # Add a pseudocount before log to ensure log(0) does not happen
-                    rate = np.log(count_matrix + 1) - np.log(gene_size)
-                    exp_rate = np.exp(rate - np.log(np.sum(count_matrix)) + np.log(1e9))
-                    matrix_values.append(exp_rate)
-                case _:
-                    raise ValueError("Invalid normalization method specified")
-
-        fpkm_matrix = pd.DataFrame(matrix_values).T  # Transpose is needed because values were appended as rows
-        fpkm_matrix = fpkm_matrix[~pd.isna(fpkm_matrix)]
-        metrics[study].normalization_matrix = fpkm_matrix
-
-        metrics[study].normalization_matrix.columns = metrics[study].count_matrix.columns
-
-    return metrics
-
-
-def _zfpkm_calculation(col: pd.Series, kernel: KernelDensity, peak_parameters: tuple[float, float]) -> _ZFPKMResult:
-    """Log2 Transformations.
-
-    Stabilize the variance in the data to make the distribution more symmetric; this is helpful for Gaussian fitting
-
-    Kernel Density Estimation (kde)
-        - Non-parametric method to estimate the probability density function (PDF) of a random variable
-        - Estimates the distribution of log2-transformed FPKM values
-        - Bandwidth parameter controls the smoothness of the density estimate
-        - KDE Explanation
-            - A way to smooth a histogram to get a better idea of the underlying distribution of the data
-            - Given a set of data points, we want to understand how they are distributed.
-                Histograms can be useful, but are sensitive to bin size and number
-            - The KDE places a "kernel" - a small symmetric function (i.e., Gaussian curve) - at each data point
-            - The "kernel" acts as a weight, giving more weight to points closer to the center of the kernel,
-                and less weight to points farther away
-            - Kernel functions are summed along each point on the x-axis
-            - A smooth curve is created that represents the estimated density of the data
-
-    Peak Finding
-        - Identifies that are above a certain height and separated by a minimum distance
-        - Represent potential local maxima in the distribution
-
-    Peak Selection
-        - The peak with the highest x-value (from log2-FPKM) is chosen as the mean (mu)
-            of the "inactive" gene distribution
-        - The peak representing unexpressed or inactive genes should be at a lower FPKM
-            value compared to the peak representing expressed genes
-
-    Standard Deviation Estimation
-         - The mean of log2-FPKM values are greater than the calculated mu
-         - Standard deviation is estimated based on the assumption that the right tail of the distribution
-            This represents expressed genes) can be approximated by a half-normal distribution
-
-     zFPKM Transformation
-        - Centers disbribution around 0 and scales it by the standard deviation.
-            This makes it easier to compare gene expression across different samples
-        - Represents the number of standard deviations away from the mean of the "inactive" gene distribution
-        - Higher zFPKM values indicate higher expression levels relative to the "inactive" peak
-        - A zFPKM value of 0 represents the mean of the "inactive" distribution
-        - Research shows that a zFPKM value of -3 or greater can be used as
-            a threshold for calling a gene as "expressed"
-            : https://doi.org/10.1186/1471-2164-14-778
-    """
-    col_log2: npt.NDArray = np.log2(col + 1)
-    col_log2 = np.nan_to_num(col_log2, nan=0)
-    refit: KernelDensity = kernel.fit(col_log2.reshape(-1, 1))  # type: ignore
-
-    # kde: KernelDensity = KernelDensity(kernel="gaussian", bandwidth=bandwidth).fit(col_log2.reshape(-1, 1))
-    x_range = np.linspace(col_log2.min(), col_log2.max(), 1000)
-    density = np.exp(refit.score_samples(x_range.reshape(-1, 1)))
-    peaks, _ = find_peaks(density, height=peak_parameters[0], distance=peak_parameters[1])
-    peak_positions = x_range[peaks]
-
-    mu = 0
-    max_fpkm = 0
-    stddev = 1
-
-    if len(peaks) != 0:
-        mu = peak_positions.max()
-        max_fpkm = density[peaks[np.argmax(peak_positions)]]
-        u = col_log2[col_log2 > mu].mean()
-        stddev = (u - mu) * np.sqrt(np.pi / 2)
-    zfpkm = pd.Series((col_log2 - mu) / stddev, dtype=np.float32, name=col.name)
-
-    return _ZFPKMResult(zfpkm=zfpkm, density=Density(x_range, density), mu=mu, std_dev=stddev, max_fpkm=max_fpkm)
-
-
-def zfpkm_transform(
-    fpkm_df: pd.DataFrame,
-    bandwidth: int = 0.5,
-    peak_parameters: tuple[float, float] = (0.02, 1.0),
-    update_every_percent: float = 0.1,
-) -> tuple[dict[str, _ZFPKMResult], DataFrame]:
-    """Perform zFPKM calculation/transformation."""
-    if update_every_percent > 1:
-        logger.warning(
-            f"update_every_percent should be a decimal value between 0 and 1; got: {update_every_percent} - "
-            f"will convert to percentage"
-        )
-        update_every_percent /= 100
-
-    total = len(fpkm_df.columns)
-    update_per_step: int = int(np.ceil(total * update_every_percent))
-    cores = multiprocessing.cpu_count() - 2
-    logger.debug(f"Processing {total:,} samples through zFPKM transform using {cores} cores")
-    logger.debug(
-        f"Will update every {update_per_step:,} steps as this is approximately "
-        f"{update_every_percent:.1%} of {total:,}"
-    )
-
-    with Pool(processes=cores) as pool:
-        kernel = KernelDensity(kernel="gaussian", bandwidth=bandwidth)
-        chunksize = int(math.ceil(len(fpkm_df.columns) / (4 * cores)))
-        partial_func = partial(_zfpkm_calculation, kernel=kernel, peak_parameters=peak_parameters)
-        chunk_time = time.time()
-        start_time = time.time()
-
-        log_padding = len(str(f"{total:,}"))
-        zfpkm_df = pd.DataFrame(data=0, index=fpkm_df.index, columns=fpkm_df.columns)
-        results: dict[str, _ZFPKMResult] = {}
-        result: _ZFPKMResult
-        for i, result in enumerate(
-            pool.imap(
-                partial_func,
-                (fpkm_df[col] for col in fpkm_df.columns),
-                chunksize=chunksize,
-            )
-        ):
-            key = str(result.zfpkm.name)
-            results[key] = result
-            zfpkm_df[key] = result.zfpkm
-
-            # show updates every X% and at the end, but skip on first iteration
-            if i != 0 and (i % update_per_step == 0 or i == total):
-                current_time = time.time()
-                chunk = current_time - chunk_time
-                total_time = current_time - start_time
-                formatted = f"{i:,}"
-                logger.debug(
-                    f"Processed {formatted:>{log_padding}} of {total:,} - "
-                    f"chunk took {chunk:.1f} seconds - "
-                    f"running for {total_time:.1f} seconds"
-                )
-                chunk_time = current_time
-    return results, zfpkm_df
-
-
-def zfpkm_plot(results, *, plot_xfloor: int = -4, subplot_titles: bool = True):
-    """Plot the log2(FPKM) density and fitted Gaussian for each sample.
-
-    :param results: A dictionary of intermediate results from zfpkm_transform.
-    :param: subplot_titles: Whether to display facet titles (sample names).
-    :param plot_xfloor: Lower limit for the x-axis.
-    :param subplot_titles: Whether to display facet titles (sample names).
-    """
-    mega_df = pd.DataFrame(columns=["sample_name", "log2fpkm", "fpkm_density", "fitted_density_scaled"])
-    for name, result in results.items():
-        stddev = result.std_dev
-        x = np.array(result.density.x)
-        y = np.array(result.density.y)
-
-        fitted = np.exp(-0.5 * ((x - result.mu) / stddev) ** 2) / (stddev * np.sqrt(2 * np.pi))
-        max_fpkm = y.max()
-        max_fitted = fitted.max()
-        scale_fitted = fitted * (max_fpkm / max_fitted)
-
-        df = pd.DataFrame(
-            {
-                "sample_name": [name] * len(x),
-                "log2fpkm": x,
-                "fpkm_density": y,
-                "fitted_density_scaled": scale_fitted,
-            }
-        )
-        mega_df = pd.concat([mega_df, df], ignore_index=True)
-
-    mega_df = mega_df.melt(id_vars=["log2fpkm", "sample_name"], var_name="source", value_name="density")
-    subplot_titles = list(results.keys()) if subplot_titles else None
-    fig = make_subplots(
-        rows=len(results),
-        cols=1,
-        subplot_titles=subplot_titles,
-        vertical_spacing=min(0.05, (1 / (len(results) - 1))),
-    )
-
-    for i, (name, group) in enumerate(mega_df.groupby("sample_name"), start=1):
-        fig.add_trace(
-            trace=go.Scatter(x=group["log2fpkm"], y=group["density"], mode="lines", name=name, legendgroup=name),
-            row=i,
-            col=1,
-        )
-        fig.update_xaxes(title_text="log2(FPKM)", range=[plot_xfloor, max(group["log2fpkm"].tolist())], row=i, col=1)
-        fig.update_yaxes(title_text="density [scaled]", row=i, col=1)
-        fig.update_layout(legend_tracegroupgap=0)
-
-    fig.update_layout(height=600 * len(results), width=1000, title_text="zFPKM Plots", showlegend=True)
-    fig.write_image("zfpkm_plot.png")
-
-
-def calculate_z_score(metrics: NamedMetrics) -> NamedMetrics:
-    """Calculate the z-score for each sample in the metrics dictionary."""
-    for sample in metrics:
-        log_matrix = np.log(metrics[sample].normalization_matrix)
-        z_matrix = pd.DataFrame(
-            data=sklearn.preprocessing.scale(log_matrix, axis=1), columns=metrics[sample].sample_names
-        )
-        metrics[sample].z_score_matrix = z_matrix
-    return metrics
-
-
-def cpm_filter(
-    *,
-    metrics: NamedMetrics,
-    filtering_options: _FilteringOptions,
-    output_csv_filepath: Path,
-) -> NamedMetrics:
-    """Apply Counts Per Million (CPM) filtering to the count matrix for a given sample."""
-    n_exp = filtering_options.replicate_ratio
-    n_top = filtering_options.high_replicate_ratio
-    cut_off = filtering_options.cut_off
-
-    metric: _StudyMetrics
-    for metric in metrics.values():
-        counts: pd.DataFrame = metric.count_matrix
-        entrez_ids: list[str] = metric.entrez_gene_ids
-        library_size: pd.DataFrame = counts.sum(axis=1)
-
-        # For library_sizes equal to 0, add 1 to prevent divide by 0
-        # This will not impact the final counts per million calculation because the original counts are still 0
-        #   thus, (0 / 1) * 1_000_000 = 0
-        library_size[library_size == 0] = 1
-
-        output_csv_filepath.parent.mkdir(parents=True, exist_ok=True)
-        counts_per_million: pd.DataFrame = (counts / library_size) * 1_000_000
-        counts_per_million.insert(0, "entrez_gene_ids", pd.Series(entrez_ids))
-        logger.debug(f"Writing CPM matrix to {output_csv_filepath}")
-        counts_per_million.to_csv(output_csv_filepath, index=False)
-
-        # TODO: Counts per million is adding ~61,500 columns (equal to the number of genes) for some reason.
-        #  Most likely due to multiplying by 1_000_000, not exactly sure why
-
-        min_samples = round(n_exp * len(counts.columns))  # noqa: F841
-        top_samples = round(n_top * len(counts.columns))  # noqa: F841
-        test_bools = pd.DataFrame({"entrez_gene_ids": entrez_ids})
-        for i in range(len(counts_per_million.columns)):
-            cutoff = (
-                10e6 / (np.median(np.sum(counts[:, i])))
-                if cut_off == "default"
-                else 1e6 * cut_off / np.median(np.sum(counts[:, i]))
-            )
-            test_bools = test_bools.merge(counts_per_million[counts_per_million.iloc[:, i] > cutoff])
-
-    return metrics
-
-
-def tpm_quantile_filter(*, metrics: NamedMetrics, filtering_options: _FilteringOptions) -> NamedMetrics:
-    """Apply quantile-based filtering to the TPM matrix for a given sample."""
-    # TODO: Write the TPM matrix to disk
-
-    n_exp = filtering_options.replicate_ratio
-    n_top = filtering_options.high_replicate_ratio
-    cut_off = filtering_options.cut_off
-    metrics = calculate_tpm(metrics)
-
-    sample: str
-    metric: _StudyMetrics
-    for sample, metric in metrics.items():
-        entrez_ids = metric.entrez_gene_ids
-        gene_size = metric.gene_sizes
-        tpm_matrix: pd.DataFrame = metric.normalization_matrix
-
-        min_samples = round(n_exp * len(tpm_matrix.columns))
-        top_samples = round(n_top * len(tpm_matrix.columns))
-
-        tpm_quantile = tpm_matrix[tpm_matrix > 0]
-        quantile_cutoff = np.quantile(
-            a=tpm_quantile.values, q=1 - (cut_off / 100), axis=0
-        )  # Compute quantile across columns
-        boolean_expression = pd.DataFrame(
-            data=tpm_matrix > quantile_cutoff, index=tpm_matrix.index, columns=tpm_matrix.columns
-        ).astype(int)
-
-        min_func = k_over_a(min_samples, 0.9)
-        top_func = k_over_a(top_samples, 0.9)
-
-        min_genes: npt.NDArray[bool] = genefilter(boolean_expression, min_func)
-        top_genes: npt.NDArray[bool] = genefilter(boolean_expression, top_func)
-
-        # Only keep `entrez_gene_ids` that pass `min_genes`
-        metric.entrez_gene_ids = [gene for gene, keep in zip(entrez_ids, min_genes) if keep]
-        metric.gene_sizes = [gene for gene, keep in zip(gene_size, min_genes) if keep]
-        metric.count_matrix = metric.count_matrix.iloc[min_genes, :]
-        metric.normalization_matrix = metrics[sample].normalization_matrix.iloc[min_genes, :]
-
-        keep_top_genes = [gene for gene, keep in zip(entrez_ids, top_genes) if keep]
-        metric.high_confidence_entrez_gene_ids = [gene for gene, keep in zip(entrez_ids, keep_top_genes) if keep]
-
-    metrics = calculate_z_score(metrics)
-
-    return metrics
-
-
-def zfpkm_filter(*, metrics: NamedMetrics, filtering_options: _FilteringOptions, calcualte_fpkm: bool) -> NamedMetrics:
-    """Apply zFPKM filtering to the FPKM matrix for a given sample."""
-    min_sample_expression = filtering_options.replicate_ratio
-    high_confidence_sample_expression = filtering_options.high_replicate_ratio
-    cut_off = filtering_options.cut_off
-
-    if calcualte_fpkm:
-        metrics = calculate_fpkm(metrics)
-
-    metric: _StudyMetrics
-    for metric in metrics.values():
-        # if fpkm was not calculated, the normalization matrix will be empty; collect the count matrix instead
-        matrix = metric.count_matrix if metric.normalization_matrix.empty else metric.normalization_matrix
-        matrix = matrix[matrix.sum(axis=1) > 0]
-
-        minimums = matrix == 0
-        results, zfpkm_df = zfpkm_transform(matrix)
-        zfpkm_df[minimums] = -4
-        zfpkm_plot(results)
-
-        # determine which genes are expressed
-        min_samples = round(min_sample_expression * len(zfpkm_df.columns))
-        min_func = k_over_a(min_samples, cut_off)
-        min_genes: npt.NDArray[bool] = genefilter(zfpkm_df, min_func)
-        metric.entrez_gene_ids = [gene for gene, keep in zip(metric.entrez_gene_ids, min_genes) if keep]
-
-        # determine which genes are confidently expressed
-        top_samples = round(high_confidence_sample_expression * len(zfpkm_df.columns))
-        top_func = k_over_a(top_samples, cut_off)
-        top_genes: npt.NDArray[bool] = genefilter(zfpkm_df, top_func)
-        metric.high_confidence_entrez_gene_ids = [gene for gene, keep in zip(metric.entrez_gene_ids, top_genes) if keep]
-
-    return metrics
-
-
-def filter_counts(
-    *,
-    metrics: NamedMetrics,
-    technique: FilteringTechnique,
-    filtering_options: _FilteringOptions,
-    cpm_output_filepath: Path | None = None,
-) -> NamedMetrics:
-    """Filter the count matrix based on the specified technique."""
-    match technique:
-        case FilteringTechnique.cpm:
-            if cpm_output_filepath is None:
-                raise ValueError("CPM output filepath must be provided")
-            return cpm_filter(
-                metrics=metrics,
-                filtering_options=filtering_options,
-                output_csv_filepath=cpm_output_filepath,
-            )
-
-        case FilteringTechnique.tpm:
-            return tpm_quantile_filter(metrics=metrics, filtering_options=filtering_options)
-
-        case FilteringTechnique.zfpkm:
-            return zfpkm_filter(metrics=metrics, filtering_options=filtering_options, calcualte_fpkm=True)
-
-        case FilteringTechnique.umi:
-            return zfpkm_filter(metrics=metrics, filtering_options=filtering_options, calcualte_fpkm=False)
-
-        case _:
-            raise ValueError(f"Technique must be one of {FilteringTechnique}")
-
-
-async def save_rnaseq_tests(
-    context_name: str,
-    counts_matrix_filepath: Path,
-    config_filepath: Path,
-    gene_info_filepath: Path,
-    output_filepath: Path,
-    prep: RNAPrepMethod,
-    taxon_id: Taxon,
-    replicate_ratio: float,
-    batch_ratio: float,
-    high_replicate_ratio: float,
-    high_batch_ratio: float,
-    technique: FilteringTechnique,
-    cut_off: int | float,
-):
-    """Save the results of the RNA-Seq tests to a CSV file."""
-    filtering_options = _FilteringOptions(
-        replicate_ratio=replicate_ratio,
-        batch_ratio=batch_ratio,
-        cut_off=cut_off,
-        high_replicate_ratio=high_replicate_ratio,
-        high_batch_ratio=high_batch_ratio,
-    )
-
-    if prep == RNAPrepMethod.SCRNA:
-        technique = FilteringTechnique.umi
-        logger.warning(
-            "Single cell filtration does not normalize and assumes "
-            "gene counts are counted with Unique Molecular Identifiers (UMIs). "
-            "Setting filtering technique to UMI now."
-        )
-
-    read_counts_results: _ReadMatrixResults = await _read_counts_matrix(
-        context_name=context_name,
-        counts_matrix_filepath=counts_matrix_filepath,
-        config_filepath=config_filepath,
-        gene_info_filepath=gene_info_filepath,
-        taxon_id=taxon_id,
-    )
-    metrics = read_counts_results.metrics
-    entrez_gene_ids = read_counts_results.entrez_gene_ids
-
-    metrics = filter_counts(
-        metrics=metrics,
-        technique=technique,
-        filtering_options=filtering_options,
-    )
-
-    expressed_genes: list[str] = []
-    top_genes: list[str] = []
-    for metric in metrics.values():
-        expressed_genes.extend(metric.entrez_gene_ids)
-        top_genes.extend(metric.high_confidence_entrez_gene_ids)
-
-    expression_frequency = pd.Series(expressed_genes).value_counts()
-    expression_df = pd.DataFrame(
-        {"entrez_gene_id": expression_frequency.index, "frequency": expression_frequency.values}
-    )
-    expression_df["prop"] = expression_df["frequency"] / len(metrics)
-    expression_df = expression_df[expression_df["prop"] >= filtering_options.batch_ratio]
-
-    top_frequency = pd.Series(top_genes).value_counts()
-    top_df = pd.DataFrame({"entrez_gene_id": top_frequency.index, "frequency": top_frequency.values})
-    top_df["prop"] = top_df["frequency"] / len(metrics)
-    top_df = top_df[top_df["prop"] >= filtering_options.high_batch_ratio]
-
-    boolean_matrix = pd.DataFrame(data={"entrez_gene_id": entrez_gene_ids, "expressed": 0, "high": 0})
-    for gene in entrez_gene_ids:
-        if gene in expression_df["entrez_gene_id"]:
-            boolean_matrix.loc[gene, "expressed"] = 1
-        if gene in top_df["entrez_gene_id"]:
-            boolean_matrix.loc[gene, "high"] = 1
-
-    expressed_count = len(boolean_matrix[boolean_matrix["expressed"] == 1])
-    high_confidence_count = len(boolean_matrix[boolean_matrix["high"] == 1])
-
-    boolean_matrix.to_csv(output_filepath, index=False)
-    logger.info(
-        f"{context_name} - Found {expressed_count} expressed genes, "
-        f"{high_confidence_count} of which are confidently expressed"
-    )
-    logger.success(f"Wrote boolean matrix to {output_filepath}")
diff --git a/main/como/rnaseq_gen.py b/main/como/rnaseq_gen.py
index 47aeb8ee..364bd4c3 100644
--- a/main/como/rnaseq_gen.py
+++ b/main/como/rnaseq_gen.py
@@ -1,130 +1,723 @@
 from __future__ import annotations
 
-import argparse
-import asyncio
-from dataclasses import dataclass
+import math
+import multiprocessing
+import time
+from collections import namedtuple
+from dataclasses import dataclass, field
+from enum import Enum
+from functools import partial
+from multiprocessing.pool import Pool
+from pathlib import Path
+from typing import Callable, NamedTuple
 
+import numpy as np
+import numpy.typing as npt
 import pandas as pd
-from fast_bioservices import Taxon
+import plotly.graph_objs as go
+import scanpy as sc
+import sklearn
+import sklearn.neighbors
+from fast_bioservices.pipeline import ensembl_to_gene_id_and_symbol
 from loguru import logger
+from pandas import DataFrame
+from plotly.subplots import make_subplots
+from scipy.signal import find_peaks
+from sklearn.neighbors import KernelDensity
 
-from como import Config
-from como.custom_types import RNAPrepMethod
-from como.rnaseq import FilteringTechnique, save_rnaseq_tests
+from como.migrations import gene_info_migrations
+from como.project import Config
+from como.types import FilteringTechnique, RNAPrepMethod
 
 
-@dataclass
-class _Arguments:
-    config_file: str
+class _FilteringOptions(NamedTuple):
     replicate_ratio: float
     batch_ratio: float
+    cut_off: float
     high_replicate_ratio: float
     high_batch_ratio: float
-    filtering_technique: FilteringTechnique
-    minimum_cutoff: int | str
-    library_prep: RNAPrepMethod
-    taxon: Taxon
+
+
+class LayoutMethod(Enum):
+    """RNA sequencing layout method."""
+
+    paired_end = "paired-end"
+    single_end = "single-end"
+
+
+@dataclass
+class _StudyMetrics:
+    study: str
+    num_samples: int
+    count_matrix: pd.DataFrame
+    fragment_lengths: npt.NDArray[np.float32]
+    sample_names: list[str]
+    layout: list[LayoutMethod]
+    entrez_gene_ids: list[str]
+    gene_sizes: npt.NDArray[np.float32]
+    __normalization_matrix: pd.DataFrame = field(default_factory=pd.DataFrame)
+    __z_score_matrix: pd.DataFrame = field(default_factory=pd.DataFrame)
+    __high_confidence_entrez_gene_ids: list[str] = field(default=list)
 
     def __post_init__(self):
-        self.library_prep = RNAPrepMethod.from_string(str(self.library_prep))
-        self.filtering_technique = FilteringTechnique.from_string(str(self.filtering_technique))
+        for layout in self.layout:
+            if layout not in LayoutMethod:
+                raise ValueError(f"Layout must be 'paired-end' or 'single-end'; got: {layout}")
 
-        if self.minimum_cutoff is None:
-            if self.filtering_technique == FilteringTechnique.tpm:
-                self.minimum_cutoff = 25
-            elif self.filtering_technique == FilteringTechnique.cpm:
-                self.minimum_cutoff = "default"
-            elif self.filtering_technique == FilteringTechnique.zfpkm:
-                self.minimum_cutoff = -3
+    @property
+    def normalization_matrix(self) -> pd.DataFrame:
+        return self.__normalization_matrix
 
+    @normalization_matrix.setter
+    def normalization_matrix(self, value: pd.DataFrame) -> None:
+        self.__normalization_matrix = value
 
-async def _handle_context_batch(
-    config_filename: str,
-    replicate_ratio: float,
-    batch_ratio: float,
-    replicate_ratio_high: float,
-    batch_ratio_high: float,
-    technique: FilteringTechnique,
-    cut_off: int | float | str,
-    prep: RNAPrepMethod,
-    taxon: Taxon,
-) -> None:
-    """Iterate through each context type and create rnaseq expression file.
+    @property
+    def z_score_matrix(self) -> pd.DataFrame:
+        return self.__z_score_matrix
 
-    :param config_filename: The configuration filename to read
-    :param replicate_ratio: The percentage of replicates that a gene must
-        appear in for a gene to be marked as "active" in a batch/study
-    :param batch_ratio: The percentage of batches that a gene must appear in for a gene to be marked as 'active"
-    :param replicate_ratio_high: The percentage of replicates that a gene must
-        appear in for a gene to be marked "highly confident" in its expression in a batch/study
-    :param batch_ratio_high: The percentage of batches that a gene must
-        appear in for a gene to be marked "highly confident" in its expression
-    :param technique: The filtering technique to use
-    :param cut_off: The cutoff value to use for the provided filtering technique
-    :param prep: The library preparation method
+    @z_score_matrix.setter
+    def z_score_matrix(self, value: pd.DataFrame) -> None:
+        self.__z_score_matrix = value
+
+    @property
+    def high_confidence_entrez_gene_ids(self) -> list[str]:
+        return self.__high_confidence_entrez_gene_ids
+
+    @high_confidence_entrez_gene_ids.setter
+    def high_confidence_entrez_gene_ids(self, values: list[str]) -> None:
+        self.__high_confidence_entrez_gene_ids = values
+
+
+class _ZFPKMResult(NamedTuple):
+    zfpkm: pd.Series
+    density: Density
+    mu: float
+    std_dev: float
+    max_fpkm: float
+
+
+class _ReadMatrixResults(NamedTuple):
+    metrics: dict[str, _StudyMetrics]
+    entrez_gene_ids: list[str]
+
+
+Density = namedtuple("Density", ["x", "y"])
+NamedMetrics = dict[str, _StudyMetrics]
+
+
+def k_over_a(k: int, a: float) -> Callable[[npt.NDArray], bool]:
+    """Return a function that filters rows of an array based on the sum of elements being greater than or equal to A at least k times.
+
+    This code is based on the `kOverA` function found in R's `genefilter` package: https://www.rdocumentation.org/packages/genefilter/versions/1.54.2/topics/kOverA
+
+    :param k: The minimum number of times the sum of elements must be greater than or equal to A.
+    :param a: The value to compare the sum of elements to.
+    :return: A function that accepts a NumPy array to perform the actual filtering
+    """  # noqa: E501
+
+    def filter_func(row: npt.NDArray) -> bool:
+        return np.sum(row >= a) >= k
+
+    return filter_func
+
+
+def genefilter(data: pd.DataFrame | npt.NDArray, filter_func: Callable[[npt.NDArray], bool]) -> npt.NDArray:
+    """Apply a filter function to the rows of the data and return the filtered array.
+
+    This code is based on the `genefilter` function found in R's `genefilter` package: https://www.rdocumentation.org/packages/genefilter/versions/1.54.2/topics/genefilter
+
+    :param data: The data to filter
+    :param filter_func: THe function to filter the data by
+    :return: A NumPy array of the filtered data.
+    """
+    if not isinstance(data, (pd.DataFrame, npt.NDArray)):
+        raise TypeError("Unsupported data type. Must be a Pandas DataFrame or a NumPy array.")
+
+    return (
+        data.apply(filter_func, axis=1).values
+        if isinstance(data, pd.DataFrame)
+        else np.apply_along_axis(filter_func, axis=1, arr=data)
+    )
+
+
+async def _read_counts(path: Path) -> pd.DataFrame:
+    if path.suffix not in {".csv", ".h5ad"}:
+        raise ValueError(f"Unknown file extension '{path.suffix}'. Valid options are '.csv' or '.h5ad'.")
+
+    matrix: pd.DataFrame
+    if path.suffix == ".csv":
+        logger.debug(f"Reading CSV file at '{path}'")
+        matrix = pd.read_csv(path, header=0)
+    elif path.suffix == ".h5ad":
+        logger.debug(f"Reading h5ad file at '{path}'")
+        # Make sample names the columns and gene data the index
+        matrix = sc.read_h5ad(path).to_df().T
+
+    return matrix
+
+
+async def _build_matrix_results(
+    *,
+    matrix: pd.DataFrame,
+    gene_info: pd.DataFrame,
+    metadata_df: pd.DataFrame,
+    taxon: int,
+) -> _ReadMatrixResults:
+    """Read the counts matrix and returns the results.
+
+    :param matrix: The gene counts matrix to process
+    :param metadata_df: The configuration dataframe related to the current context
     :param taxon: The NCBI Taxon ID
-    :return: None
+    :return: A dataclass `ReadMatrixResults`
     """
-    config = Config()
+    gene_info = gene_info_migrations(gene_info)
+    conversion = await ensembl_to_gene_id_and_symbol(ids=matrix["ensembl_gene_id"].tolist(), taxon=taxon)
+    matrix = matrix.merge(conversion, on="ensembl_gene_id", how="left")
 
-    config_filepath = config.config_dir / config_filename
-    if not config_filepath.exists():
-        raise FileNotFoundError(f"Unable to find '{config_filename}' at the path: '{config_filepath}'")
-    xl = pd.ExcelFile(config_filepath)
-    sheet_names = xl.sheet_names
+    # Only include Entrez and Ensembl Gene IDs that are present in `gene_info`
+    matrix["entrez_gene_id"] = matrix["entrez_gene_id"].str.split("//")
+    matrix = matrix.explode("entrez_gene_id")
+    matrix = matrix.replace(to_replace="-", value=pd.NA).dropna()
+    matrix["entrez_gene_id"] = matrix["entrez_gene_id"].astype(int)
 
-    logger.info(f"Reading config file: {config_filepath}")
+    gene_info = gene_info.replace(to_replace="-", value=pd.NA).dropna()
+    gene_info["entrez_gene_id"] = gene_info["entrez_gene_id"].astype(int)
 
-    for context_name in sheet_names:
-        logger.debug(f"Starting '{context_name}'")
+    counts_matrix = matrix.merge(
+        gene_info[["entrez_gene_id", "ensembl_gene_id"]],
+        on=["entrez_gene_id", "ensembl_gene_id"],
+        how="inner",
+    )
+    gene_info = gene_info.merge(
+        counts_matrix[["entrez_gene_id", "ensembl_gene_id"]],
+        on=["entrez_gene_id", "ensembl_gene_id"],
+        how="inner",
+    )
 
-        rnaseq_input_filepath = (
-            config.data_dir / "data_matrices" / context_name / f"gene_counts_matrix_{prep.value}_{context_name}"
+    entrez_gene_ids: list[str] = gene_info["entrez_gene_id"].tolist()
+    metrics: NamedMetrics = {}
+    for study in metadata_df["study"].unique().tolist():
+        study_sample_names = metadata_df[metadata_df["study"] == study]["sample_name"].tolist()
+        layouts = metadata_df[metadata_df["study"] == study]["layout"].tolist()
+        metrics[study] = _StudyMetrics(
+            count_matrix=counts_matrix[counts_matrix.columns.intersection(study_sample_names)],
+            fragment_lengths=metadata_df[metadata_df["study"] == study]["fragment_length"].values,
+            sample_names=study_sample_names,
+            layout=[LayoutMethod(layout) for layout in layouts],
+            num_samples=len(study_sample_names),
+            entrez_gene_ids=entrez_gene_ids,
+            gene_sizes=np.array(gene_info["size"].values).astype(np.float32),
+            study=study,
         )
-        if prep == RNAPrepMethod.SCRNA:
-            rnaseq_input_filepath = rnaseq_input_filepath.with_suffix(".h5ad")
-        elif prep in {RNAPrepMethod.TOTAL, RNAPrepMethod.MRNA}:
-            rnaseq_input_filepath = rnaseq_input_filepath.with_suffix(".csv")
-
-        if not rnaseq_input_filepath.exists():
-            logger.warning(f"Gene counts matrix not found at {rnaseq_input_filepath}, skipping...")
-            continue
-
-        gene_info_filepath = config.data_dir / "gene_info.csv"
-        rnaseq_output_filepath = (
-            config.result_dir / context_name / prep.value / f"rnaseq_{prep.value}_{context_name}.csv"
+        metrics[study].fragment_lengths[np.isnan(metrics[study].fragment_lengths)] = 0
+        metrics[study].count_matrix.index = pd.Index(entrez_gene_ids, name="entrez_gene_id")
+
+    return _ReadMatrixResults(metrics=metrics, entrez_gene_ids=gene_info["entrez_gene_id"].tolist())
+
+
+def calculate_tpm(metrics: NamedMetrics) -> NamedMetrics:
+    """Calculate the Transcripts Per Million (TPM) for each sample in the metrics dictionary."""
+    for sample in metrics:
+        count_matrix = metrics[sample].count_matrix
+
+        gene_sizes = metrics[sample].gene_sizes
+
+        tpm_matrix = pd.DataFrame(data=None, index=count_matrix.index, columns=count_matrix.columns)
+        for i in range(len(count_matrix.columns)):
+            values: pd.Series = count_matrix.iloc[:, i] + 1  # Add 1 to prevent division by 0
+            rate = np.log(values.tolist()) - np.log(gene_sizes)
+            denominator = np.log(np.sum(np.exp(rate)))
+            tpm_value = np.exp(rate - denominator + np.log(1e6))
+            tpm_matrix.iloc[:, i] = tpm_value
+        metrics[sample].normalization_matrix = tpm_matrix
+
+    return metrics
+
+
+def calculate_fpkm(metrics: NamedMetrics) -> NamedMetrics:
+    """Calculate the Fragments Per Kilobase of transcript per Million mapped reads (FPKM) for each sample in the metrics dictionary."""  # noqa: E501
+    matrix_values = []
+    for study in metrics:
+        for sample in range(metrics[study].num_samples):
+            layout = metrics[study].layout[sample]
+            count_matrix: npt.NDArray = metrics[study].count_matrix.iloc[:, sample].values
+            gene_size = metrics[study].gene_sizes
+
+            count_matrix = count_matrix.astype(np.float32)
+            gene_size = gene_size.astype(np.float32)
+
+            match layout:
+                case LayoutMethod.paired_end:  # FPKM
+                    mean_fragment_lengths = metrics[study].fragment_lengths[sample]
+                    # Ensure non-negative value
+                    effective_length = [max(0, size - (mean_fragment_lengths + 1)) for size in gene_size]
+                    n = count_matrix.sum()
+                    fpkm = ((count_matrix + 1) * 1e9) / (np.array(effective_length) * n)
+                    matrix_values.append(fpkm)
+                case LayoutMethod.single_end:  # RPKM
+                    # Add a pseudocount before log to ensure log(0) does not happen
+                    rate = np.log(count_matrix + 1) - np.log(gene_size)
+                    exp_rate = np.exp(rate - np.log(np.sum(count_matrix)) + np.log(1e9))
+                    matrix_values.append(exp_rate)
+                case _:
+                    raise ValueError("Invalid normalization method specified")
+
+        fpkm_matrix = pd.DataFrame(matrix_values).T  # Transpose is needed because values were appended as rows
+        fpkm_matrix = fpkm_matrix[~pd.isna(fpkm_matrix)]
+        metrics[study].normalization_matrix = fpkm_matrix
+
+        metrics[study].normalization_matrix.columns = metrics[study].count_matrix.columns
+
+    return metrics
+
+
+def _zfpkm_calculation(col: pd.Series, kernel: KernelDensity, peak_parameters: tuple[float, float]) -> _ZFPKMResult:
+    """Log2 Transformations.
+
+    Stabilize the variance in the data to make the distribution more symmetric; this is helpful for Gaussian fitting
+
+    Kernel Density Estimation (kde)
+        - Non-parametric method to estimate the probability density function (PDF) of a random variable
+        - Estimates the distribution of log2-transformed FPKM values
+        - Bandwidth parameter controls the smoothness of the density estimate
+        - KDE Explanation
+            - A way to smooth a histogram to get a better idea of the underlying distribution of the data
+            - Given a set of data points, we want to understand how they are distributed.
+                Histograms can be useful, but are sensitive to bin size and number
+            - The KDE places a "kernel" - a small symmetric function (i.e., Gaussian curve) - at each data point
+            - The "kernel" acts as a weight, giving more weight to points closer to the center of the kernel,
+                and less weight to points farther away
+            - Kernel functions are summed along each point on the x-axis
+            - A smooth curve is created that represents the estimated density of the data
+
+    Peak Finding
+        - Identifies that are above a certain height and separated by a minimum distance
+        - Represent potential local maxima in the distribution
+
+    Peak Selection
+        - The peak with the highest x-value (from log2-FPKM) is chosen as the mean (mu)
+            of the "inactive" gene distribution
+        - The peak representing unexpressed or inactive genes should be at a lower FPKM
+            value compared to the peak representing expressed genes
+
+    Standard Deviation Estimation
+         - The mean of log2-FPKM values are greater than the calculated mu
+         - Standard deviation is estimated based on the assumption that the right tail of the distribution
+            This represents expressed genes) can be approximated by a half-normal distribution
+
+     zFPKM Transformation
+        - Centers disbribution around 0 and scales it by the standard deviation.
+            This makes it easier to compare gene expression across different samples
+        - Represents the number of standard deviations away from the mean of the "inactive" gene distribution
+        - Higher zFPKM values indicate higher expression levels relative to the "inactive" peak
+        - A zFPKM value of 0 represents the mean of the "inactive" distribution
+        - Research shows that a zFPKM value of -3 or greater can be used as
+            a threshold for calling a gene as "expressed"
+            : https://doi.org/10.1186/1471-2164-14-778
+    """
+    col_log2: npt.NDArray = np.log2(col + 1)
+    col_log2 = np.nan_to_num(col_log2, nan=0)
+    refit: KernelDensity = kernel.fit(col_log2.reshape(-1, 1))  # type: ignore
+
+    # kde: KernelDensity = KernelDensity(kernel="gaussian", bandwidth=bandwidth).fit(col_log2.reshape(-1, 1))
+    x_range = np.linspace(col_log2.min(), col_log2.max(), 1000)
+    density = np.exp(refit.score_samples(x_range.reshape(-1, 1)))
+    peaks, _ = find_peaks(density, height=peak_parameters[0], distance=peak_parameters[1])
+    peak_positions = x_range[peaks]
+
+    mu = 0
+    max_fpkm = 0
+    stddev = 1
+
+    if len(peaks) != 0:
+        mu = peak_positions.max()
+        max_fpkm = density[peaks[np.argmax(peak_positions)]]
+        u = col_log2[col_log2 > mu].mean()
+        stddev = (u - mu) * np.sqrt(np.pi / 2)
+    zfpkm = pd.Series((col_log2 - mu) / stddev, dtype=np.float32, name=col.name)
+
+    return _ZFPKMResult(zfpkm=zfpkm, density=Density(x_range, density), mu=mu, std_dev=stddev, max_fpkm=max_fpkm)
+
+
+def zfpkm_transform(
+    fpkm_df: pd.DataFrame,
+    bandwidth: int = 0.5,
+    peak_parameters: tuple[float, float] = (0.02, 1.0),
+    update_every_percent: float = 0.1,
+) -> tuple[dict[str, _ZFPKMResult], DataFrame]:
+    """Perform zFPKM calculation/transformation."""
+    if update_every_percent > 1:
+        logger.warning(
+            f"update_every_percent should be a decimal value between 0 and 1; got: {update_every_percent} - "
+            f"will convert to percentage"
         )
-        rnaseq_output_filepath.parent.mkdir(parents=True, exist_ok=True)
-
-        await save_rnaseq_tests(
-            context_name=context_name,
-            counts_matrix_filepath=rnaseq_input_filepath,
-            config_filepath=config_filepath,
-            output_filepath=rnaseq_output_filepath.as_posix(),
-            gene_info_filepath=gene_info_filepath,
-            prep=prep,
-            replicate_ratio=replicate_ratio,
-            batch_ratio=batch_ratio,
-            high_replicate_ratio=replicate_ratio_high,
-            high_batch_ratio=batch_ratio_high,
-            technique=technique,
-            cut_off=cut_off,
-            taxon_id=taxon,
+        update_every_percent /= 100
+
+    total = len(fpkm_df.columns)
+    update_per_step: int = int(np.ceil(total * update_every_percent))
+    cores = min(multiprocessing.cpu_count() - 2, total)
+    logger.debug(f"Processing {total:,} samples through zFPKM transform using {cores} cores")
+    logger.debug(
+        f"Will update every {update_per_step:,} steps as this is approximately "
+        f"{update_every_percent:.1%} of {total:,}"
+    )
+
+    with Pool(processes=cores) as pool:
+        kernel = KernelDensity(kernel="gaussian", bandwidth=bandwidth)
+        chunksize = int(math.ceil(len(fpkm_df.columns) / (4 * cores)))
+        partial_func = partial(_zfpkm_calculation, kernel=kernel, peak_parameters=peak_parameters)
+        chunk_time = time.time()
+        start_time = time.time()
+
+        log_padding = len(str(f"{total:,}"))
+        zfpkm_df = pd.DataFrame(data=0, index=fpkm_df.index, columns=fpkm_df.columns)
+        results: dict[str, _ZFPKMResult] = {}
+        result: _ZFPKMResult
+        for i, result in enumerate(
+            pool.imap(
+                partial_func,
+                (fpkm_df[col] for col in fpkm_df.columns),
+                chunksize=chunksize,
+            )
+        ):
+            key = str(result.zfpkm.name)
+            results[key] = result
+            zfpkm_df[key] = result.zfpkm
+
+            # show updates every X% and at the end, but skip on first iteration
+            if i != 0 and (i % update_per_step == 0 or i == total):
+                current_time = time.time()
+                chunk = current_time - chunk_time
+                total_time = current_time - start_time
+                formatted = f"{i:,}"
+                logger.debug(
+                    f"Processed {formatted:>{log_padding}} of {total:,} - "
+                    f"chunk took {chunk:.1f} seconds - "
+                    f"running for {total_time:.1f} seconds"
+                )
+                chunk_time = current_time
+    return results, zfpkm_df
+
+
+def zfpkm_plot(results, *, plot_xfloor: int = -4, subplot_titles: bool = True):
+    """Plot the log2(FPKM) density and fitted Gaussian for each sample.
+
+    :param results: A dictionary of intermediate results from zfpkm_transform.
+    :param: subplot_titles: Whether to display facet titles (sample names).
+    :param plot_xfloor: Lower limit for the x-axis.
+    :param subplot_titles: Whether to display facet titles (sample names).
+    """
+    mega_df = pd.DataFrame(columns=["sample_name", "log2fpkm", "fpkm_density", "fitted_density_scaled"])
+    for name, result in results.items():
+        stddev = result.std_dev
+        x = np.array(result.density.x)
+        y = np.array(result.density.y)
+
+        fitted = np.exp(-0.5 * ((x - result.mu) / stddev) ** 2) / (stddev * np.sqrt(2 * np.pi))
+        max_fpkm = y.max()
+        max_fitted = fitted.max()
+        scale_fitted = fitted * (max_fpkm / max_fitted)
+
+        df = pd.DataFrame(
+            {
+                "sample_name": [name] * len(x),
+                "log2fpkm": x,
+                "fpkm_density": y,
+                "fitted_density_scaled": scale_fitted,
+            }
         )
-        logger.success(f"Results saved at '{rnaseq_output_filepath}'")
+        mega_df = pd.concat([mega_df, df], ignore_index=True)
+
+    mega_df = mega_df.melt(id_vars=["log2fpkm", "sample_name"], var_name="source", value_name="density")
+    subplot_titles = list(results.keys()) if subplot_titles else None
+    fig = make_subplots(
+        rows=len(results),
+        cols=1,
+        subplot_titles=subplot_titles,
+        vertical_spacing=min(0.05, (1 / (len(results) - 1))),
+    )
+
+    for i, (name, group) in enumerate(mega_df.groupby("sample_name"), start=1):
+        fig.add_trace(
+            trace=go.Scatter(x=group["log2fpkm"], y=group["density"], mode="lines", name=name, legendgroup=name),
+            row=i,
+            col=1,
+        )
+        fig.update_xaxes(title_text="log2(FPKM)", range=[plot_xfloor, max(group["log2fpkm"].tolist())], row=i, col=1)
+        fig.update_yaxes(title_text="density [scaled]", row=i, col=1)
+        fig.update_layout(legend_tracegroupgap=0)
+
+    fig.update_layout(height=600 * len(results), width=1000, title_text="zFPKM Plots", showlegend=True)
+    fig.write_image("zfpkm_plot.png")
 
 
-async def rnaseq_gen(
-    # config_filepath: Path,
-    config_filename: str,
+def calculate_z_score(metrics: NamedMetrics) -> NamedMetrics:
+    """Calculate the z-score for each sample in the metrics dictionary."""
+    for sample in metrics:
+        log_matrix = np.log(metrics[sample].normalization_matrix)
+        z_matrix = pd.DataFrame(
+            data=sklearn.preprocessing.scale(log_matrix, axis=1), columns=metrics[sample].sample_names
+        )
+        metrics[sample].z_score_matrix = z_matrix
+    return metrics
+
+
+def cpm_filter(
+    *,
+    context_name: str,
+    metrics: NamedMetrics,
+    filtering_options: _FilteringOptions,
     prep: RNAPrepMethod,
-    taxon_id: int | str | Taxon,
+) -> NamedMetrics:
+    """Apply Counts Per Million (CPM) filtering to the count matrix for a given sample."""
+    config = Config()
+    n_exp = filtering_options.replicate_ratio
+    n_top = filtering_options.high_replicate_ratio
+    cut_off = filtering_options.cut_off
+
+    sample: str
+    metric: _StudyMetrics
+    for sample, metric in metrics.items():
+        counts: pd.DataFrame = metric.count_matrix
+        entrez_ids: list[str] = metric.entrez_gene_ids
+        library_size: pd.DataFrame = counts.sum(axis=1)
+
+        # For library_sizes equal to 0, add 1 to prevent divide by 0
+        # This will not impact the final counts per million calculation because the original counts are still 0
+        #   thus, (0 / 1) * 1_000_000 = 0
+        library_size[library_size == 0] = 1
+
+        output_filepath = config.result_dir / context_name / prep.value / f"CPM_Matrix_{prep.value}_{sample}.csv"
+        output_filepath.parent.mkdir(parents=True, exist_ok=True)
+        counts_per_million: pd.DataFrame = (counts / library_size) * 1_000_000
+        counts_per_million.insert(0, "entrez_gene_ids", pd.Series(entrez_ids))
+        logger.debug(f"Writing CPM matrix to {output_filepath}")
+        counts_per_million.to_csv(output_filepath, index=False)
+
+        # TODO: Counts per million is adding ~61,500 columns (equal to the number of genes) for some reason.
+        #  Most likely due to multiplying by 1_000_000, not exactly sure why
+
+        min_samples = round(n_exp * len(counts.columns))  # noqa: F841
+        top_samples = round(n_top * len(counts.columns))  # noqa: F841
+        test_bools = pd.DataFrame({"entrez_gene_ids": entrez_ids})
+        for i in range(len(counts_per_million.columns)):
+            cutoff = (
+                10e6 / (np.median(np.sum(counts[:, i])))
+                if cut_off == "default"
+                else (1e6 * cut_off) / np.median(np.sum(counts[:, i]))
+            )
+            test_bools = test_bools.merge(counts_per_million[counts_per_million.iloc[:, i] > cutoff])
+
+    return metrics
+
+
+def tpm_quantile_filter(*, metrics: NamedMetrics, filtering_options: _FilteringOptions) -> NamedMetrics:
+    """Apply quantile-based filtering to the TPM matrix for a given sample."""
+    # TODO: Write the TPM matrix to disk
+
+    n_exp = filtering_options.replicate_ratio
+    n_top = filtering_options.high_replicate_ratio
+    cut_off = filtering_options.cut_off
+    metrics = calculate_tpm(metrics)
+
+    sample: str
+    metric: _StudyMetrics
+    for sample, metric in metrics.items():
+        entrez_ids = metric.entrez_gene_ids
+        gene_size = metric.gene_sizes
+        tpm_matrix: pd.DataFrame = metric.normalization_matrix
+
+        min_samples = round(n_exp * len(tpm_matrix.columns))
+        top_samples = round(n_top * len(tpm_matrix.columns))
+
+        tpm_quantile = tpm_matrix[tpm_matrix > 0]
+        quantile_cutoff = np.quantile(
+            a=tpm_quantile.values, q=1 - (cut_off / 100), axis=0
+        )  # Compute quantile across columns
+        boolean_expression = pd.DataFrame(
+            data=tpm_matrix > quantile_cutoff, index=tpm_matrix.index, columns=tpm_matrix.columns
+        ).astype(int)
+
+        min_func = k_over_a(min_samples, 0.9)
+        top_func = k_over_a(top_samples, 0.9)
+
+        min_genes: npt.NDArray[bool] = genefilter(boolean_expression, min_func)
+        top_genes: npt.NDArray[bool] = genefilter(boolean_expression, top_func)
+
+        # Only keep `entrez_gene_ids` that pass `min_genes`
+        metric.entrez_gene_ids = [gene for gene, keep in zip(entrez_ids, min_genes) if keep]
+        metric.gene_sizes = [gene for gene, keep in zip(gene_size, min_genes) if keep]
+        metric.count_matrix = metric.count_matrix.iloc[min_genes, :]
+        metric.normalization_matrix = metrics[sample].normalization_matrix.iloc[min_genes, :]
+
+        keep_top_genes = [gene for gene, keep in zip(entrez_ids, top_genes) if keep]
+        metric.high_confidence_entrez_gene_ids = [gene for gene, keep in zip(entrez_ids, keep_top_genes) if keep]
+
+    metrics = calculate_z_score(metrics)
+
+    return metrics
+
+
+def zfpkm_filter(*, metrics: NamedMetrics, filtering_options: _FilteringOptions, calcualte_fpkm: bool) -> NamedMetrics:
+    """Apply zFPKM filtering to the FPKM matrix for a given sample."""
+    min_sample_expression = filtering_options.replicate_ratio
+    high_confidence_sample_expression = filtering_options.high_replicate_ratio
+    cut_off = filtering_options.cut_off
+
+    if calcualte_fpkm:
+        metrics = calculate_fpkm(metrics)
+
+    metric: _StudyMetrics
+    for metric in metrics.values():
+        # if fpkm was not calculated, the normalization matrix will be empty; collect the count matrix instead
+        matrix = metric.count_matrix if metric.normalization_matrix.empty else metric.normalization_matrix
+        matrix = matrix[matrix.sum(axis=1) > 0]
+
+        minimums = matrix == 0
+        results, zfpkm_df = zfpkm_transform(matrix)
+        zfpkm_df[minimums] = -4
+        zfpkm_plot(results)
+
+        # determine which genes are expressed
+        min_samples = round(min_sample_expression * len(zfpkm_df.columns))
+        min_func = k_over_a(min_samples, cut_off)
+        min_genes: npt.NDArray[bool] = genefilter(zfpkm_df, min_func)
+        metric.entrez_gene_ids = [gene for gene, keep in zip(metric.entrez_gene_ids, min_genes) if keep]
+
+        # determine which genes are confidently expressed
+        top_samples = round(high_confidence_sample_expression * len(zfpkm_df.columns))
+        top_func = k_over_a(top_samples, cut_off)
+        top_genes: npt.NDArray[bool] = genefilter(zfpkm_df, top_func)
+        metric.high_confidence_entrez_gene_ids = [gene for gene, keep in zip(metric.entrez_gene_ids, top_genes) if keep]
+
+    return metrics
+
+
+def filter_counts(
+    *,
+    context_name: str,
+    metrics: NamedMetrics,
+    technique: FilteringTechnique,
+    filtering_options: _FilteringOptions,
+    prep: RNAPrepMethod,
+) -> NamedMetrics:
+    """Filter the count matrix based on the specified technique."""
+    match technique:
+        case FilteringTechnique.cpm:
+            return cpm_filter(
+                context_name=context_name, metrics=metrics, filtering_options=filtering_options, prep=prep
+            )
+        case FilteringTechnique.tpm:
+            return tpm_quantile_filter(metrics=metrics, filtering_options=filtering_options)
+        case FilteringTechnique.zfpkm:
+            return zfpkm_filter(metrics=metrics, filtering_options=filtering_options, calcualte_fpkm=True)
+        case FilteringTechnique.umi:
+            return zfpkm_filter(metrics=metrics, filtering_options=filtering_options, calcualte_fpkm=False)
+        case _:
+            raise ValueError(f"Technique must be one of {FilteringTechnique}")
+
+
+async def _save_rnaseq_tests(
+    context_name: str,
+    rnaseq_matrix: pd.DataFrame,
+    metadata_df: pd.DataFrame,
+    gene_info_df: pd.DataFrame,
+    output_filepath: Path,
+    prep: RNAPrepMethod,
+    taxon: int,
+    replicate_ratio: float,
+    batch_ratio: float,
+    high_replicate_ratio: float,
+    high_batch_ratio: float,
+    technique: FilteringTechnique,
+    cut_off: int | float,
+):
+    """Save the results of the RNA-Seq tests to a CSV file."""
+    filtering_options = _FilteringOptions(
+        replicate_ratio=replicate_ratio,
+        batch_ratio=batch_ratio,
+        cut_off=float(cut_off),
+        high_replicate_ratio=high_replicate_ratio,
+        high_batch_ratio=high_batch_ratio,
+    )
+
+    read_counts_results: _ReadMatrixResults = await _build_matrix_results(
+        matrix=rnaseq_matrix,
+        gene_info=gene_info_df,
+        metadata_df=metadata_df,
+        taxon=taxon,
+    )
+    metrics = read_counts_results.metrics
+    entrez_gene_ids = read_counts_results.entrez_gene_ids
+
+    metrics = filter_counts(
+        context_name=context_name,
+        metrics=metrics,
+        technique=technique,
+        filtering_options=filtering_options,
+        prep=prep,
+    )
+
+    expressed_genes: list[str] = []
+    top_genes: list[str] = []
+    for metric in metrics.values():
+        expressed_genes.extend(metric.entrez_gene_ids)
+        top_genes.extend(metric.high_confidence_entrez_gene_ids)
+
+    expression_frequency = pd.Series(expressed_genes).value_counts()
+    expression_df = pd.DataFrame(
+        {"entrez_gene_id": expression_frequency.index, "frequency": expression_frequency.values}
+    )
+    expression_df["prop"] = expression_df["frequency"] / len(metrics)
+    expression_df = expression_df[expression_df["prop"] >= filtering_options.batch_ratio]
+
+    top_frequency = pd.Series(top_genes).value_counts()
+    top_df = pd.DataFrame({"entrez_gene_id": top_frequency.index, "frequency": top_frequency.values})
+    top_df["prop"] = top_df["frequency"] / len(metrics)
+    top_df = top_df[top_df["prop"] >= filtering_options.high_batch_ratio]
+
+    boolean_matrix = pd.DataFrame(data={"entrez_gene_id": entrez_gene_ids, "expressed": 0, "high": 0})
+    for gene in entrez_gene_ids:
+        if gene in expression_df["entrez_gene_id"]:
+            boolean_matrix.loc[gene, "expressed"] = 1
+        if gene in top_df["entrez_gene_id"]:
+            boolean_matrix.loc[gene, "high"] = 1
+
+    expressed_count = len(boolean_matrix[boolean_matrix["expressed"] == 1])
+    high_confidence_count = len(boolean_matrix[boolean_matrix["high"] == 1])
+
+    boolean_matrix.to_csv(output_filepath, index=False)
+    logger.info(
+        f"{context_name} - Found {expressed_count} expressed and {high_confidence_count} confidently expressed genes"
+    )
+    logger.success(f"Wrote boolean matrix to {output_filepath}")
+
+
+async def _create_metadata_df(path: Path) -> pd.DataFrame:
+    if path.suffix not in {".xls", ".xlsx"}:
+        raise ValueError(
+            f"Expected an excel file with extension of '.xlsx' or '.xls', got '{path.suffix}'. "
+            f"Attempted to process: {path}"
+        )
+    return pd.read_excel(path)
+
+
+async def rnaseq_gen(  # noqa: C901, allow complex function
+    context_name: str,
+    input_rnaseq_filepath: Path,
+    input_gene_info_filepath: Path,
+    output_rnaseq_filepath: Path,
+    prep: RNAPrepMethod,
+    taxon: int,
+    input_metadata_filepath: Path | None = None,
+    input_metadata_df: pd.DataFrame | None = None,
     replicate_ratio: float = 0.5,
     high_replicate_ratio: float = 1.0,
     batch_ratio: float = 0.5,
     high_batch_ratio: float = 1.0,
     technique: FilteringTechnique | str = FilteringTechnique.tpm,
-    cut_off: int | float | None = None,
+    cutoff: int | float | None = None,
 ) -> None:
     """Generate a list of active and high-confidence genes from a gene count matrix.
 
@@ -132,9 +725,14 @@ async def rnaseq_gen(
         then study/batch numbers are checked for consensus according to batch ratios.
     The zFPKM method is outlined here: https://pubmed.ncbi.nlm.nih.gov/24215113/
 
-    :param config_filename: The configuration filename to read
+    :param context_name: The name of the context being processed
+    :param input_rnaseq_filepath: The filepath to the gene count matrix
+    :param input_gene_info_filepath: The filepath to the gene info file
+    :param output_rnaseq_filepath: The filepath to write the output gene count matrix
     :param prep: The preparation method
-    :param taxon_id: The NCBI Taxon ID
+    :param taxon: The NCBI Taxon ID
+    :param input_metadata_filepath: The filepath to the metadata file
+    :param input_metadata_df: The metadata dataframe
     :param replicate_ratio: The percentage of replicates that a gene must
         appear in for a gene to be marked as "active" in a batch/study
     :param batch_ratio: The percentage of batches that a gene must appear in for a gene to be marked as 'active"
@@ -143,168 +741,61 @@ async def rnaseq_gen(
     :param high_batch_ratio: The percentage of batches that a gene must
         appear in for a gene to be marked "highly confident" in its expression
     :param technique: The filtering technique to use
-    :param cut_off: The cutoff value to use for the provided filtering technique
+    :param cutoff: The cutoff value to use for the provided filtering technique
     :return: None
     """
-    if isinstance(technique, str):
-        technique = FilteringTechnique(technique.lower())
-    if isinstance(taxon_id, (str, int)):
-        taxon_id = Taxon.from_string(str(taxon_id))
+    if not input_metadata_df and not input_metadata_filepath:
+        raise ValueError("At least one of input_metadata_filepath or input_metadata_df must be provided")
+
+    technique = (
+        FilteringTechnique.from_string(str(technique.lower())) if isinstance(technique, (str, int)) else technique
+    )
 
     match technique:
         case FilteringTechnique.tpm:
-            cut_off = 25 if cut_off is None else cut_off
-            if cut_off < 1 or cut_off > 100:
+            cutoff = cutoff or 25
+            if cutoff < 1 or cutoff > 100:
                 raise ValueError("Quantile must be between 1 - 100")
 
         case FilteringTechnique.cpm:
-            if cut_off is not None and cut_off < 0:
+            if cutoff and cutoff < 0:
                 raise ValueError("Cutoff must be greater than 0")
-            elif cut_off is None:
-                cut_off = "default"
+            elif cutoff:
+                cutoff = "default"
 
         case FilteringTechnique.zfpkm:
-            cut_off = "default" if cut_off is None else cut_off
+            cutoff = cutoff or -3
         case FilteringTechnique.umi:
             pass
         case _:
             raise ValueError(f"Technique must be one of {FilteringTechnique}")
 
-    await _handle_context_batch(
-        config_filename=config_filename,
+    if not input_rnaseq_filepath.exists():
+        raise FileNotFoundError(f"Input RNA-seq file not found! Searching for: '{input_rnaseq_filepath}'")
+
+    if prep == RNAPrepMethod.SCRNA:
+        technique = FilteringTechnique.umi
+        logger.warning(
+            "Single cell filtration does not normalize and assumes "
+            "gene counts are counted with Unique Molecular Identifiers (UMIs). "
+            "Setting filtering technique to UMI now."
+        )
+
+    logger.debug(f"Starting '{context_name}'")
+    output_rnaseq_filepath.parent.mkdir(parents=True, exist_ok=True)
+
+    await _save_rnaseq_tests(
+        context_name=context_name,
+        rnaseq_matrix=await _read_counts(input_rnaseq_filepath),
+        metadata_df=input_metadata_df or await _create_metadata_df(input_metadata_filepath),
+        gene_info_df=pd.read_csv(input_gene_info_filepath),
+        output_filepath=output_rnaseq_filepath,
+        prep=prep,
+        taxon=taxon,
         replicate_ratio=replicate_ratio,
-        replicate_ratio_high=high_replicate_ratio,
         batch_ratio=batch_ratio,
-        batch_ratio_high=high_batch_ratio,
+        high_replicate_ratio=high_replicate_ratio,
+        high_batch_ratio=high_batch_ratio,
         technique=technique,
-        cut_off=cut_off,
-        prep=prep,
-        taxon=taxon_id,
-    )
-
-
-def _parse_args() -> _Arguments:
-    parser = argparse.ArgumentParser(
-        prog="rnaseq_gen.py",
-        description="Generate a list of active and high-confidence genes from a counts matrix using a user defined "
-        "at normalization-technique at /work/data/results/<context name>/rnaseq_<context_name>.csv: "
-        "https://github.com/HelikarLab/FastqToGeneCounts",
-        epilog="For additional help, please post questions/issues in the MADRID GitHub repo at "
-        "https://github.com/HelikarLab/MADRID or email babessell@gmail.com",
-    )
-    parser.add_argument(
-        "-c",
-        "--config-file",
-        type=str,
-        required=True,
-        dest="config_file",
-        help="Name of config .xlsx file in the /work/data/config_files/. Can be generated using "
-        "rnaseq_preprocess.py or manually created and imported into the Juypterlab",
-    )
-    parser.add_argument(
-        "-r",
-        "--replicate-ratio",
-        type=float,
-        required=False,
-        default=0.5,
-        dest="replicate_ratio",
-        help="Ratio of replicates required for a gene to be active within that study/batch group "
-        "Example: 0.7 means that for a gene to be active, at least 70% of replicates in a group "
-        "must pass the cutoff after normalization",
-    )
-    parser.add_argument(
-        "-g",
-        "--batch-ratio",
-        type=float,
-        required=False,
-        default=0.5,
-        dest="batch_ratio",
-        help="Ratio of groups (studies or batches) required for a gene to be active "
-        "Example: 0.7 means that for a gene to be active, at least 70% of groups in a study must  "
-        "have passed the replicate ratio test",
-    )
-    parser.add_argument(
-        "-rh",
-        "--high-replicate-ratio",
-        type=float,
-        required=False,
-        default=1.0,
-        dest="high_replicate_ratio",
-        help="Ratio of replicates required for a gene to be considered high-confidence. "
-        "High-confidence genes ignore consensus with other data-sources, such as proteomics. "
-        "Example: 0.9 means that for a gene to be high-confidence, "
-        "at least 90% of replicates in a group must pass the cutoff after normalization",
-    )
-    parser.add_argument(
-        "-gh",
-        "--high-batch-ratio",
-        type=float,
-        required=False,
-        default=1.0,
-        dest="high_batch_ratio",
-        help="Ratio of studies/batches required for a gene to be considered high-confidence within that group. "
-        "High-confidence genes ignore consensus with other data-sources, like proteomics. "
-        "Example: 0.9 means that for a gene to be high-confidence, "
-        "at least 90% of groups in a study must have passed the replicate ratio test",
-    )
-    parser.add_argument(
-        "--taxon",
-        "--taxon-id",
-        type=str,
-        required=True,
-        dest="taxon",
-        help="The NCBI Taxonomy ID that is being proessed. '9606' for humans, '10090' for mice.",
-    )
-    parser.add_argument(
-        "-t",
-        "--filt-technique",
-        type=str,
-        required=False,
-        default="quantile",
-        dest="filtering_technique",
-        help="Technique to normalize and filter counts with. "
-        "Either 'zfpkm', 'quantile', or 'cpm'. More info about each method is discussed in pipeline.ipynb.",
-    )
-    parser.add_argument(
-        "--minimum-cutoff",
-        type=int,
-        required=False,
-        default=None,
-        dest="minimum_cutoff",
-        help="The minimum cutoff used for the filtration technique. "
-        "If the filtering technique is zFPKM, the default is -3. "
-        "If the filtering technique is quantile-tpm, the default is 25. "
-        "If the filtering technique is flat-cpm, the default is determined dynamically. "
-        "If the filtering technique is quantile, the default is 25.",
-    )
-    parser.add_argument(
-        "-p",
-        "--library-prep",
-        required=True,
-        choices=["total", "mrna", "scrna"],
-        dest="library_prep",
-        help="Library preparation method. "
-        "Will separate samples into groups to only compare similarly prepared libraries. "
-        "For example, mRNA, total-rna, scRNA, etc",
-    )
-    args = parser.parse_args()
-    args.filtering_technique = args.filtering_technique.lower()
-    args.taxon = Taxon.from_int(int(args.taxon)) if str(args.taxon).isdigit() else Taxon.from_string(str(args.taxon))  # type: ignore
-    return _Arguments(**vars(args))
-
-
-if __name__ == "__main__":
-    args = _parse_args()
-    asyncio.run(
-        rnaseq_gen(
-            config_filename=args.config_file,
-            replicate_ratio=args.replicate_ratio,
-            batch_ratio=args.batch_ratio,
-            high_replicate_ratio=args.high_replicate_ratio,
-            high_batch_ratio=args.high_batch_ratio,
-            technique=args.filtering_technique,
-            cut_off=args.minimum_cutoff,
-            prep=args.library_prep,
-            taxon_id=args.taxon,
-        )
+        cut_off=cutoff,
     )
diff --git a/main/como/rnaseq_preprocess.py b/main/como/rnaseq_preprocess.py
index ae8bcdea..02702b38 100644
--- a/main/como/rnaseq_preprocess.py
+++ b/main/como/rnaseq_preprocess.py
@@ -138,16 +138,18 @@ def _organize_gene_counts_files(data_dir: Path) -> list[_StudyMetrics]:
     # For each study, collect gene count files, fragment files, insert size files, layouts, and strandedness information
     study_metrics: list[_StudyMetrics] = []
     for gene_dir, strand_dir in zip(gene_counts_directories, strandedness_directories):
-        if gene_dir.stem != strand_dir.stem:
-            raise ValueError(
-                f"Gene directory name of '{gene_dir.stem}' does not match stranded directory name of '{strand_dir.stem}'"  # noqa: E501
-            )
+        count_files = list(gene_dir.glob("*.tab"))
+        strand_files = list(strand_dir.glob("*.txt"))
+        if len(count_files) == 0:
+            raise ValueError(f"No count files found for study '{gene_dir.stem}'.")
+        if len(strand_files) == 0:
+            raise ValueError(f"No strandedness files found for study '{gene_dir.stem}'.")
 
         study_metrics.append(
             _StudyMetrics(
                 study_name=gene_dir.stem,
-                count_files=list(gene_dir.glob("*.tab")),
-                strand_files=list(strand_dir.glob("*.txt")),
+                count_files=count_files,
+                strand_files=strand_files,
             )
         )
     return study_metrics
@@ -262,6 +264,7 @@ async def _write_counts_matrix(
     counts: list[pd.DataFrame] = await asyncio.gather(
         *[_create_sample_counts_matrix(metric) for metric in study_metrics]
     )
+
     final_matrix = pd.DataFrame()
     for count in counts:
         final_matrix = count if final_matrix.empty else pd.merge(final_matrix, count, on="ensembl_gene_id", how="outer")
@@ -275,13 +278,28 @@ async def _write_counts_matrix(
     return final_matrix
 
 
-async def _create_config_df(context_name: str, /, como_input_dir: Path) -> pd.DataFrame:  # noqa: C901
+async def _create_config_df(  # noqa: C901
+    context_name: str,
+    /,
+    como_context_dir: Path,
+    gene_count_dirname: str = "geneCounts",
+    layout_dirname: str = "layouts",
+    strandedness_dirname: str = "strandedness",
+    fragment_sizes_dirname: str = "fragmentSizes",
+    prep_method_dirname: str = "prepMethods",
+) -> pd.DataFrame:
     """Create configuration sheet.
 
     The configuration file created is based on the gene counts matrix.
      If using zFPKM normalization technique, mean fragment lengths will be fetched
     """
-    gene_counts_files = list(Path(como_input_dir, context_name, "geneCounts").rglob("*.tab"))
+    gene_counts_dir = como_context_dir / gene_count_dirname
+    layout_dir = como_context_dir / layout_dirname
+    strandedness_dir = como_context_dir / strandedness_dirname
+    fragment_sizes_dir = como_context_dir / fragment_sizes_dirname
+    prep_method_dir = como_context_dir / prep_method_dirname
+
+    gene_counts_files = list(gene_counts_dir.rglob("*.tab"))
     sample_names: list[str] = []
     fragment_lengths: list[int | float] = []
     layouts: list[str] = []
@@ -289,52 +307,51 @@ async def _create_config_df(context_name: str, /, como_input_dir: Path) -> pd.Da
     groups: list[str] = []
     preparation_method: list[str] = []
 
+    if len(gene_counts_files) == 0:
+        raise FileNotFoundError(f"No gene count files found in '{gene_counts_dir}'.")
+
     for gene_count_filename in sorted(gene_counts_files):
-        try:
-            # Match S___R___r___
-            # \d{1,3} matches 1-3 digits
-            # (?:r\d{1,3})? matches an option "r" followed by three digits
-            label = re.findall(r"S\d{1,3}R\d{1,3}(?:r\d{1,3})?", gene_count_filename.as_posix())[0]
-
-        except IndexError as e:
-            raise IndexError(
+        # Match S___R___r___
+        # \d{1,3} matches 1-3 digits
+        # (?:r\d{1,3})? optionally matches a "r" followed by three digits
+        label = re.findall(r"S\d{1,3}R\d{1,3}(?:r\d{1,3})?", gene_count_filename.as_posix())[0]
+        if not label:
+            raise ValueError(
                 f"\n\nFilename of '{gene_count_filename}' is not valid. "
                 f"Should be 'contextName_SXRYrZ.tab', where X is the study/batch number, Y is the replicate number, "
                 f"and Z is the run number."
                 "\n\nIf not a multi-run sample, exclude 'rZ' from the filename."
-            ) from e
+            )
 
         study_number = re.findall(r"S\d{1,3}", label)[0]
         rep_number = re.findall(r"R\d{1,3}", label)[0]
-        run = re.findall(r"r\d{1,3}", label)
+        run_number = re.findall(r"r\d{1,3}", label)
 
         multi_flag = 0
-        if len(run) > 0:
-            if run[0] != "r1":
+        if len(run_number) > 0:
+            if run_number[0] != "r1":
                 continue
-            else:
-                label_glob = study_number + rep_number + "r*"
-                runs = [run for run in gene_counts_files if re.search(label_glob, run.as_posix())]
-                multi_flag = 1
-                frag_files = []
-
-                for r in runs:
-                    r_label = re.findall(r"r\d{1,3}", r.as_posix())[0]
-                    R_label = re.findall(r"R\d{1,3}", r.as_posix())[0]  # noqa: N806
-                    frag_filename = "".join([context_name, "_", study_number, R_label, r_label, "_fragment_size.txt"])
-                    frag_files.append(como_input_dir / context_name / "fragmentSizes" / study_number / frag_filename)
-
-        context_path = como_input_dir / context_name
-        layout_files: list[Path] = list((context_path / "layouts").rglob(f"{context_name}_{label}_layout.txt"))
-        strand_files: list[Path] = list((context_path / "strandedness").rglob(f"{context_name}_{label}_strandedness.txt"))  # fmt: skip  # noqa: E501
-        frag_files: list[Path] = list((context_path / "fragmentSizes").rglob(f"{context_name}_{label}_fragment_size.txt"))  # fmt: skip  # noqa: E501
-        prep_files: list[Path] = list((context_path / "prepMethods").rglob(f"{context_name}_{label}_prep_method.txt"))
+            label_glob = f"{study_number}{rep_number}r*"  # S__R__r*
+            runs = [run for run in gene_counts_files if re.search(label_glob, run.as_posix())]
+            multi_flag = 1
+            frag_files = []
+
+            for run in runs:
+                run_number = re.findall(r"R\d{1,3}", run.as_posix())[0]
+                replicate = re.findall(r"r\d{1,3}", run.as_posix())[0]
+                frag_filename = "".join([context_name, "_", study_number, run_number, replicate, "_fragment_size.txt"])
+                frag_files.append(como_context_dir / fragment_sizes_dirname / study_number / frag_filename)
+
+        layout_files: list[Path] = list(layout_dir.rglob(f"{context_name}_{label}_layout.txt"))
+        strand_files: list[Path] = list(strandedness_dir.rglob(f"{context_name}_{label}_strandedness.txt"))
+        frag_files: list[Path] = list(fragment_sizes_dir.rglob(f"{context_name}_{label}_fragment_size.txt"))
+        prep_files: list[Path] = list(prep_method_dir.rglob(f"{context_name}_{label}_prep_method.txt"))
 
         layout = "UNKNOWN"
         if len(layout_files) == 0:
             logger.warning(
                 f"No layout file found for {label}, writing as 'UNKNOWN', "
-                f"this should be defined by user if using zFPKM or rnaseq_gen.py will not run"
+                f"this should be defined if you are using zFPKM or downstream 'rnaseq_gen.py' will not run"
             )
         elif len(layout_files) == 1:
             with layout_files[0].open("r") as file:
@@ -363,7 +380,7 @@ async def _create_config_df(context_name: str, /, como_input_dir: Path) -> pd.Da
 
         prep = "total"
         if len(prep_files) == 0:
-            logger.warning(f"No prep file found for {label}, assuming 'total' as in Total RNA library preparation")
+            logger.warning(f"No prep file found for {label}, assuming 'total', as in 'Total RNA' library preparation")
         elif len(prep_files) == 1:
             with prep_files[0].open("r") as file:
                 prep = file.read().strip().lower()
@@ -376,10 +393,10 @@ async def _create_config_df(context_name: str, /, como_input_dir: Path) -> pd.Da
             )
 
         mean_fragment_size = 100
-        if len(frag_files) == 0:
+        if len(frag_files) == 0 and prep != RNAPrepMethod.TOTAL.value:
             logger.warning(
                 f"No fragment file found for {label}, using '100'. "
-                f"This must be defined by the user in order to use zFPKM normalization"
+                "You should define this if you are going to use downstream zFPKM normalization"
             )
         elif len(frag_files) == 1:
             if layout == "single-end":
@@ -448,11 +465,11 @@ async def read_counts(file: Path) -> list[str]:
         )
         return conversion["entrez_gene_id"].tolist()
 
-    logger.info("Fetching gene info (this may take 1-5 minutes)")
+    logger.info(
+        "Fetching gene info (this may take 1-5 minutes depending on the number of genes and your internet connection)"
+    )
     genes = set(chain.from_iterable(await asyncio.gather(*[read_counts(f) for f in counts_matrix_filepaths])))
-
-    mygene = MyGene(cache=cache)
-    gene_data = await mygene.query(items=list(genes), taxon=taxon, scopes="entrezgene")
+    gene_data = await MyGene(cache=cache).query(items=list(genes), taxon=taxon, scopes="entrezgene")
     gene_info: pd.DataFrame = pd.DataFrame(
         data=None,
         columns=pd.Index(data=["ensembl_gene_id", "gene_symbol", "entrez_gene_id", "start_position", "end_position"]),
@@ -495,7 +512,7 @@ async def _create_matrix_file(
     output_counts_matrix_filepath: Path,
     rna: type_rna,
 ) -> None:
-    config_df = await _create_config_df(context_name, como_input_dir=como_context_dir)
+    config_df = await _create_config_df(context_name, como_context_dir=como_context_dir)
     await _write_counts_matrix(
         config_df=config_df,
         como_context_dir=como_context_dir,
@@ -523,7 +540,7 @@ async def _process(
     if output_trna_config_filepath:
         rna_types.append(("total", output_trna_config_filepath, output_trna_matrix_filepath))
     if output_mrna_config_filepath:
-        rna_types.append(("polya", output_mrna_config_filepath, output_mrna_matrix_filepath))
+        rna_types.append(("mrna", output_mrna_config_filepath, output_mrna_matrix_filepath))
 
     # if provided, iterate through como-input specific directories
     tasks = []
@@ -563,9 +580,9 @@ async def rnaseq_preprocess(
     input_matrix_filepath: Path | list[Path] | None = None,
     preparation_method: RNAPrepMethod | list[RNAPrepMethod] | None = None,
     output_trna_config_filepath: Path | None = None,
-    output_polya_config_filepath: Path | None = None,
+    output_mrna_config_filepath: Path | None = None,
     output_trna_count_matrix_filepath: Path | None = None,
-    output_polya_count_matrix_filepath: Path | None = None,
+    output_mrna_count_matrix_filepath: Path | None = None,
     cache: bool = True,
     log_level: Literal["DEBUG", "INFO", "WARNING", "ERROR", "CRITICAL"] = "INFO",
     log_location: str | TextIOWrapper = sys.stderr,
@@ -579,9 +596,9 @@ async def rnaseq_preprocess(
     :param taxon: The NCBI taxonomy ID
     :param output_gene_info_filepath: Path to the output gene information CSV file
     :param output_trna_config_filepath: Path to the output tRNA config file (if in "create" mode)
-    :param output_polya_config_filepath: Path to the output mRNA config file (if in "create" mode)
+    :param output_mrna_config_filepath: Path to the output mRNA config file (if in "create" mode)
     :param output_trna_count_matrix_filepath: The path to write total RNA count matrices
-    :param output_polya_count_matrix_filepath: The path to write messenger RNA count matrices
+    :param output_mrna_count_matrix_filepath: The path to write messenger RNA count matrices
     :param como_context_dir: If in "create" mode, the input path(s) to the COMO_input directory of the current context
         i.e., the directory containing "fragmentSizes", "geneCounts", "insertSizeMetrics", etc. directories
     :param input_matrix_filepath: If in "provide" mode, the path(s) to the count matrices to be processed
@@ -604,18 +621,18 @@ async def rnaseq_preprocess(
     output_trna_config_filepath = (
         output_trna_config_filepath.resolve() if output_trna_config_filepath else output_trna_config_filepath
     )
-    output_polya_config_filepath = (
-        output_polya_config_filepath.resolve() if output_polya_config_filepath else output_polya_config_filepath
+    output_mrna_config_filepath = (
+        output_mrna_config_filepath.resolve() if output_mrna_config_filepath else output_mrna_config_filepath
     )
     output_trna_count_matrix_filepath = (
         output_trna_count_matrix_filepath.resolve()
         if output_trna_count_matrix_filepath
         else output_trna_count_matrix_filepath
     )
-    output_polya_count_matrix_filepath = (
-        output_polya_count_matrix_filepath.resolve()
-        if output_polya_count_matrix_filepath
-        else output_polya_count_matrix_filepath
+    output_mrna_count_matrix_filepath = (
+        output_mrna_count_matrix_filepath.resolve()
+        if output_mrna_count_matrix_filepath
+        else output_mrna_count_matrix_filepath
     )
 
     input_matrix_filepath = _listify(input_matrix_filepath)
@@ -633,8 +650,8 @@ async def rnaseq_preprocess(
         input_matrix_filepath=input_matrix_filepath,
         output_gene_info_filepath=output_gene_info_filepath,
         output_trna_config_filepath=output_trna_config_filepath,
-        output_mrna_config_filepath=output_polya_config_filepath,
+        output_mrna_config_filepath=output_mrna_config_filepath,
         output_trna_matrix_filepath=output_trna_count_matrix_filepath,
-        output_mrna_matrix_filepath=output_polya_count_matrix_filepath,
+        output_mrna_matrix_filepath=output_mrna_count_matrix_filepath,
         cache=cache,
     )
diff --git a/main/como/types.py b/main/como/types.py
index ebe44ecf..916d3a25 100644
--- a/main/como/types.py
+++ b/main/como/types.py
@@ -46,5 +46,30 @@ def from_string(value: str) -> RNAPrepMethod:
                 raise ValueError(f"Filtering technique must be one of {possible_values}; got: {value}")
 
 
+class FilteringTechnique(Enum):
+    """RNA sequencing filtering capabilities."""
+
+    cpm = "cpm"
+    zfpkm = "zfpkm"
+    tpm = "quantile"
+    umi = "umi"
+
+    @staticmethod
+    def from_string(value: str) -> FilteringTechnique:
+        """Create a filtering technique object from a string."""
+        match value.lower():
+            case "cpm":
+                return FilteringTechnique.cpm
+            case "zfpkm":
+                return FilteringTechnique.zfpkm
+            case "quantile":
+                return FilteringTechnique.tpm
+            case "umi":
+                return FilteringTechnique.umi
+            case _:
+                possible_values = [t.value for t in FilteringTechnique]
+                raise ValueError(f"Got a filtering technique of '{value}'; should be one of: {possible_values}")
+
+
 type_path = str | Path
-type_rna = Literal["total", "polya"]
+type_rna = Literal["total", "mrna"]
diff --git a/main/data/boundary_rxns/naiveB_boundary_rxns.csv b/main/data/boundary_rxns/naiveB_boundary_rxns.csv
index 7130c9be..0b08556e 100644
--- a/main/data/boundary_rxns/naiveB_boundary_rxns.csv
+++ b/main/data/boundary_rxns/naiveB_boundary_rxns.csv
@@ -1,4 +1,4 @@
-Reaction,Abbreviation,Compartment,Minimum Reaction Rate,Maximum Reaction Rate
+Boundary,Abbreviation,Compartment,Minimum Reaction Rate,Maximum Reaction Rate
 Exchange,glc_D,Extracellular,-100,1000
 Exchange,fe2,Extracellular,-1000,1000
 Exchange,gal,Extracellular,0,1000
diff --git a/main/data/boundary_rxns/smB_boundary_rxns.csv b/main/data/boundary_rxns/smB_boundary_rxns.csv
index 9f0f9385..0b08556e 100644
--- a/main/data/boundary_rxns/smB_boundary_rxns.csv
+++ b/main/data/boundary_rxns/smB_boundary_rxns.csv
@@ -1,4 +1,4 @@
-Reaction,Abbreviation,Compartment,Minimum Reaction Rate,Maximum Reaction Rate
+Boundary,Abbreviation,Compartment,Minimum Reaction Rate,Maximum Reaction Rate
 Exchange,glc_D,Extracellular,-100,1000
 Exchange,fe2,Extracellular,-1000,1000
 Exchange,gal,Extracellular,0,1000
@@ -67,4 +67,4 @@ Exchange,orn,Extracellular,-1,1000
 Exchange,orn_D,Extracellular,-1,1000
 Exchange,so4,Extracellular,-1000,1000
 Exchange,ribflv,Extracellular,-1,1000
-Exchange,Lcystin,Extracellular,-1,1000
+Exchange,Lcystin,Extracellular,-1,1000
\ No newline at end of file
diff --git a/pyproject.toml b/pyproject.toml
index 85a1308f..fabd6c69 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -5,7 +5,7 @@ requires-python = ">=3.10,<3.13"
 dependencies = [
     "aiofiles>=24.1.0",
     "aioftp>=0.23.1",
-    "cobamp",
+    "cobamp@git+https://github.com/JoshLoecker/cobamp@master",
     "cobra>=0.28.0",
     "fast-bioservices>=0.3.9",
     "gurobipy>=11.0",
@@ -20,7 +20,7 @@ dependencies = [
     "scikit-learn>=1.5.2",
     "scipy>=1.7.3",
     "setuptools<60.0",
-    "troppo",
+    "troppo@git+https://github.com/JoshLoecker/troppo@master",
 ]
 
 [project.optional-dependencies]
@@ -57,14 +57,9 @@ dev-dependencies = [
     "cz-conventional-gitmoji>=0.6.1",
 ]
 
-[tool.uv.sources]
-troppo = { git = "https://github.com/JoshLoecker/troppo", rev = "update_dependencies" }
-cobamp = { git = "https://github.com/JoshLoecker/cobamp", rev = "update_packages" }
-pydantic-settings = { git = "https://github.com/pydantic/pydantic-settings" }
-
 [tool.commitizen]
 name = "cz_conventional_commits"
 tag_format = "$version"
 version_scheme = "semver2"
 version_provider = "pep621"
-update_changelog_on_bump = true
+update_changelog_on_bump = true
\ No newline at end of file