Change folder architecture

b355fbd8 · Almouhannad Hafez · 4a19d3ea · 4a19d3ea · b355fbd8 · b355fbd8
Commit b355fbd8 authored Nov 22, 2024 by Almouhannad Hafez
11 changed files
--- a/1.1.Dataset_Overview.ipynb
+++ b/1.1.Dataset_Overview.ipynb
--- a/1/1.1.Dataset_Overview.ipynb
+++ b/1/1.1.Dataset_Overview.ipynb
--- a/1.2.Data_Preprocessing.ipynb
+++ b/1.2.Data_Preprocessing.ipynb
@@ -17,9 +17,7 @@
    "from sklearn.utils import shuffle\n",
    "from sklearn.model_selection import train_test_split\n",
    "\n",
-    "import contractions\n",
+    "import contractions"
-    "\n",
-    "from constants import CONSTANTS"
   ]
  },
  {
@@ -27,6 +25,21 @@
   "execution_count": 2,
   "metadata": {},
   "outputs": [],
+   "source": [
+    "import sys\n",
+    "import os\n",
+    "parent_dir = os.path.abspath('..')\n",
+    "sys.path.append(parent_dir)\n",
+    "from constants import CONSTANTS\n",
+    "%load_ext autoreload\n",
+    "%autoreload 2"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 3,
+   "metadata": {},
+   "outputs": [],
   "source": [
    "df = pd.read_csv(CONSTANTS.DATASET_PATH)"
   ]
@@ -40,7 +53,7 @@
  },
  {
   "cell_type": "code",
-   "execution_count": 3,
+   "execution_count": 4,
   "metadata": {},
   "outputs": [],
   "source": [
@@ -49,7 +62,7 @@
  },
  {
   "cell_type": "code",
-   "execution_count": 4,
+   "execution_count": 5,
   "metadata": {},
   "outputs": [
    {
@@ -122,7 +135,7 @@
       "4           4  Psoriasis  My nails have small dents or pits in them, and..."
      ]
     },
-     "execution_count": 4,
+     "execution_count": 5,
     "metadata": {},
     "output_type": "execute_result"
    }
@@ -140,7 +153,7 @@
  },
  {
   "cell_type": "code",
-   "execution_count": 5,
+   "execution_count": 6,
   "metadata": {},
   "outputs": [
    {
@@ -207,7 +220,7 @@
       "4  My nails have small dents or pits in them, and...  Psoriasis"
      ]
     },
-     "execution_count": 5,
+     "execution_count": 6,
     "metadata": {},
     "output_type": "execute_result"
    }
@@ -228,7 +241,7 @@
  },
  {
   "cell_type": "code",
-   "execution_count": 6,
+   "execution_count": 7,
   "metadata": {},
   "outputs": [
    {
@@ -258,7 +271,7 @@
  },
  {
   "cell_type": "code",
-   "execution_count": 7,
+   "execution_count": 8,
   "metadata": {},
   "outputs": [
    {
@@ -292,7 +305,7 @@
  },
  {
   "cell_type": "code",
-   "execution_count": 8,
+   "execution_count": 9,
   "metadata": {},
   "outputs": [
    {
@@ -359,7 +372,7 @@
       "4  I've been quite exhausted and ill. My throat h...            Common Cold"
      ]
     },
-     "execution_count": 8,
+     "execution_count": 9,
     "metadata": {},
     "output_type": "execute_result"
    }
@@ -379,7 +392,7 @@
  },
  {
   "cell_type": "code",
-   "execution_count": 9,
+   "execution_count": 10,
   "metadata": {},
   "outputs": [
    {
@@ -440,7 +453,7 @@
  },
  {
   "cell_type": "code",
-   "execution_count": 10,
+   "execution_count": 11,
   "metadata": {},
   "outputs": [
    {
@@ -507,7 +520,7 @@
       "4  i've been quite exhausted and ill. my throat h...            common cold"
      ]
     },
-     "execution_count": 10,
+     "execution_count": 11,
     "metadata": {},
     "output_type": "execute_result"
    }
@@ -528,7 +541,7 @@
  },
  {
   "cell_type": "code",
-   "execution_count": 11,
+   "execution_count": 12,
   "metadata": {},
   "outputs": [
    {
@@ -595,7 +608,7 @@
       "4  i have been quite exhausted and ill. my throat...            common cold"
      ]
     },
-     "execution_count": 11,
+     "execution_count": 12,
     "metadata": {},
     "output_type": "execute_result"
    }
@@ -614,7 +627,7 @@
  },
  {
   "cell_type": "code",
-   "execution_count": 12,
+   "execution_count": 13,
   "metadata": {},
   "outputs": [
    {
@@ -634,7 +647,7 @@
  },
  {
   "cell_type": "code",
-   "execution_count": 13,
+   "execution_count": 14,
   "metadata": {},
   "outputs": [],
   "source": [

--- a/2.Stemmer.ipynb
+++ b/2.Stemmer.ipynb
@@ -28,9 +28,22 @@
    "\n",
    "from tabulate import tabulate\n",
    "\n",
-    "import pandas as pd\n",
+    "import pandas as pd"
-    "\n",
+   ]
-    "from constants import CONSTANTS"
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 2,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "import sys\n",
+    "import os\n",
+    "parent_dir = os.path.abspath('..')\n",
+    "sys.path.append(parent_dir)\n",
+    "from constants import CONSTANTS\n",
+    "%load_ext autoreload\n",
+    "%autoreload 2"
   ]
  },
  {
@@ -42,7 +55,7 @@
  },
  {
   "cell_type": "code",
-   "execution_count": 2,
+   "execution_count": 3,
   "metadata": {},
   "outputs": [],
   "source": [
@@ -56,7 +69,7 @@
  },
  {
   "cell_type": "code",
-   "execution_count": 3,
+   "execution_count": 4,
   "metadata": {},
   "outputs": [],
   "source": [
@@ -70,7 +83,7 @@
  },
  {
   "cell_type": "code",
-   "execution_count": 4,
+   "execution_count": 5,
   "metadata": {},
   "outputs": [],
   "source": [
@@ -98,7 +111,7 @@
  },
  {
   "cell_type": "code",
-   "execution_count": 5,
+   "execution_count": 6,
   "metadata": {},
   "outputs": [],
   "source": [
@@ -109,7 +122,7 @@
  },
  {
   "cell_type": "code",
-   "execution_count": 6,
+   "execution_count": 7,
   "metadata": {},
   "outputs": [],
   "source": [
@@ -127,7 +140,7 @@
  },
  {
   "cell_type": "code",
-   "execution_count": 7,
+   "execution_count": 8,
   "metadata": {},
   "outputs": [],
   "source": [
@@ -144,7 +157,7 @@
  },
  {
   "cell_type": "code",
-   "execution_count": 8,
+   "execution_count": 9,
   "metadata": {},
   "outputs": [],
   "source": [
@@ -170,7 +183,7 @@
  },
  {
   "cell_type": "code",
-   "execution_count": 9,
+   "execution_count": 10,
   "metadata": {},
   "outputs": [
    {
@@ -241,7 +254,7 @@
  },
  {
   "cell_type": "code",
-   "execution_count": 10,
+   "execution_count": 11,
   "metadata": {},
   "outputs": [],
   "source": [
@@ -261,7 +274,7 @@
  },
  {
   "cell_type": "code",
-   "execution_count": 11,
+   "execution_count": 12,
   "metadata": {},
   "outputs": [],
   "source": [
@@ -318,7 +331,7 @@
  },
  {
   "cell_type": "code",
-   "execution_count": 12,
+   "execution_count": 13,
   "metadata": {},
   "outputs": [
    {
@@ -349,7 +362,7 @@
  },
  {
   "cell_type": "code",
-   "execution_count": 13,
+   "execution_count": 14,
   "metadata": {},
   "outputs": [
    {
@@ -377,7 +390,7 @@
  },
  {
   "cell_type": "code",
-   "execution_count": 14,
+   "execution_count": 15,
   "metadata": {},
   "outputs": [
    {
@@ -793,7 +806,7 @@
       "MultinomialNB(alpha=0.1)"
      ]
     },
-     "execution_count": 14,
+     "execution_count": 15,
     "metadata": {},
     "output_type": "execute_result"
    }
@@ -806,7 +819,7 @@
  },
  {
   "cell_type": "code",
-   "execution_count": 15,
+   "execution_count": 16,
   "metadata": {},
   "outputs": [
    {
@@ -844,7 +857,7 @@
  },
  {
   "cell_type": "code",
-   "execution_count": 16,
+   "execution_count": 17,
   "metadata": {},
   "outputs": [
    {

--- a/3.1.Lemmatizer.ipynb
+++ b/3.1.Lemmatizer.ipynb
@@ -42,9 +42,22 @@
    "\n",
    "from tabulate import tabulate\n",
    "\n",
-    "import pandas as pd\n",
+    "import pandas as pd"
-    "\n",
+   ]
-    "from constants import CONSTANTS"
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "import sys\n",
+    "import os\n",
+    "parent_dir = os.path.abspath('..')\n",
+    "sys.path.append(parent_dir)\n",
+    "from constants import CONSTANTS\n",
+    "%load_ext autoreload\n",
+    "%autoreload 2"
   ]
  },
  {

--- a/3.2.POS_Tagging_Filter.ipynb
+++ b/3.2.POS_Tagging_Filter.ipynb
@@ -30,9 +30,22 @@
    "\n",
    "from tabulate import tabulate\n",
    "\n",
-    "import pandas as pd\n",
+    "import pandas as pd"
-    "\n",
+   ]
-    "from constants import CONSTANTS"
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "import sys\n",
+    "import os\n",
+    "parent_dir = os.path.abspath('..')\n",
+    "sys.path.append(parent_dir)\n",
+    "from constants import CONSTANTS\n",
+    "%load_ext autoreload\n",
+    "%autoreload 2"
   ]
  },
  {

--- a/3.3.N-Grams.ipynb
+++ b/3.3.N-Grams.ipynb
@@ -25,9 +25,22 @@
    "\n",
    "from tabulate import tabulate\n",
    "\n",
-    "import pandas as pd\n",
+    "import pandas as pd"
-    "\n",
+   ]
-    "from constants import CONSTANTS\n"
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "import sys\n",
+    "import os\n",
+    "parent_dir = os.path.abspath('..')\n",
+    "sys.path.append(parent_dir)\n",
+    "from constants import CONSTANTS\n",
+    "%load_ext autoreload\n",
+    "%autoreload 2"
   ]
  },
  {

--- a/4.Data_augmentation.ipynb
+++ b/4.Data_augmentation.ipynb
@@ -17,9 +17,22 @@
    "\n",
    "from ollama import Client\n",
    "\n",
-    "import re\n",
+    "import re"
-    "\n",
+   ]
-    "from constants import CONSTANTS"
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "import sys\n",
+    "import os\n",
+    "parent_dir = os.path.abspath('..')\n",
+    "sys.path.append(parent_dir)\n",
+    "from constants import CONSTANTS\n",
+    "%load_ext autoreload\n",
+    "%autoreload 2"
   ]
  },
  {

--- a/5.0.Process_texts_stanza.ipynb
+++ b/5.0.Process_texts_stanza.ipynb
@@ -26,9 +26,22 @@
    "\n",
    "import stanza\n",
    "\n",
-    "import pickle # Module to store python objects\n",
+    "import pickle # Module to store python objects"
-    "\n",
+   ]
-    "from constants import CONSTANTS"
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "import sys\n",
+    "import os\n",
+    "parent_dir = os.path.abspath('..')\n",
+    "sys.path.append(parent_dir)\n",
+    "from constants import CONSTANTS\n",
+    "%load_ext autoreload\n",
+    "%autoreload 2"
   ]
  },
  {

--- a/5.1.Dep_parsing_classifier.ipynb
+++ b/5.1.Dep_parsing_classifier.ipynb
@@ -27,9 +27,22 @@
    "from sklearn.model_selection import train_test_split\n",
    "from sklearn.metrics import accuracy_score, classification_report\n",
    "\n",
-    "from tabulate import tabulate\n",
+    "from tabulate import tabulate"
-    "\n",
+   ]
-    "from constants import CONSTANTS"
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "import sys\n",
+    "import os\n",
+    "parent_dir = os.path.abspath('..')\n",
+    "sys.path.append(parent_dir)\n",
+    "from constants import CONSTANTS\n",
+    "%load_ext autoreload\n",
+    "%autoreload 2"
   ]
  },
  {

--- a/constants.py
+++ b/constants.py
 class CONSTANTS:
-    DATASET_PATH = 'data/Symptom2Disease.csv'
+    DATASET_PATH = '../data/Symptom2Disease.csv'
-    TRAIN_SET_PATH = 'data/Preprocessed_Symptom2Disease_Train.csv'
+    TRAIN_SET_PATH = '../data/Preprocessed_Symptom2Disease_Train.csv'
-    TEST_SET_PATH = 'data/Preprocessed_Symptom2Disease_Test.csv'
+    TEST_SET_PATH = '../data/Preprocessed_Symptom2Disease_Test.csv'
-    AUGMENTED_DATASET_PATH = 'data/augmented_Symptom2Disease.csv'
+    AUGMENTED_DATASET_PATH = '../data/augmented_Symptom2Disease.csv'
-    AUGMENTED_TRAIN_SET_PATH = 'data/augmented_Preprocessed_Symptom2Disease_Train.csv'
+    AUGMENTED_TRAIN_SET_PATH = '../data/augmented_Preprocessed_Symptom2Disease_Train.csv'
-    AUGMENTED_TEST_SET_PATH = 'data/augmented_Preprocessed_Symptom2Disease_Test.csv'
+    AUGMENTED_TEST_SET_PATH = '../data/augmented_Preprocessed_Symptom2Disease_Test.csv'
-    LEMMATIZED_TEXTS_OBJECT_PATH = 'stanza/lemmatized_texts.pkl'
+    LEMMATIZED_TEXTS_OBJECT_PATH = '../stanza/lemmatized_texts.pkl'
-    DEP_PARSED_TEXTS_OBJECT_PATH = 'stanza/dep_parsed_texts.pkl'
+    DEP_PARSED_TEXTS_OBJECT_PATH = '../stanza/dep_parsed_texts.pkl'
\ No newline at end of file