Commit 6f099ed0 authored by Almouhannad's avatar Almouhannad

Use helpers

parent ac58b5ff
__pycache__/constants.cpython-311.pyc __pycache__/constants.cpython-311.pyc
data/bread_basket_preprocessed.csv data/bread_basket_preprocessed.csv
__pycache__/helpers.cpython-311.pyc
...@@ -12,4 +12,6 @@ ...@@ -12,4 +12,6 @@
> - **FP Growth** > - **FP Growth**
> 1. **Performance comparison between the two algorithms** > 1. **Performance comparison between the two algorithms**
> ***This project contains a python file `constants.ipynb` containing some fixed values used in `hw.ipynb`, refered as `CONSTANTS` class*** > ***This project contains a python file `constants.ipynb` containing some fixed values used in `hw1.ipynb`, refered as `CONSTANTS` class***
> ***This project contains a python file `helpers.ipynb` containing some helper functions used in `hw1.ipynb`, refered as `HELPERS` class***
\ No newline at end of file
import pandas as pd
class HELPERS:
def read_dataset_from_csv(path):
try:
data = pd.read_csv(path)
return data
except FileNotFoundError:
print(f"Error: The file at {path} was not found.")
except pd.errors.EmptyDataError:
print(f"Error: The file at {path} is empty.")
except pd.errors.ParserError:
print(f"Error: The file at {path} could not be parsed.")
except Exception as e:
print(f"An unexpected error occurred: {e}")
\ No newline at end of file
...@@ -59,6 +59,7 @@ ...@@ -59,6 +59,7 @@
"metadata": {}, "metadata": {},
"outputs": [], "outputs": [],
"source": [ "source": [
"from helpers import HELPERS\n",
"from constants import CONSTANTS\n", "from constants import CONSTANTS\n",
"# Some more magic so that the notebook will reload external python modules;\n", "# Some more magic so that the notebook will reload external python modules;\n",
"# see http://stackoverflow.com/questions/1907993/autoreload-of-modules-in-ipython\n", "# see http://stackoverflow.com/questions/1907993/autoreload-of-modules-in-ipython\n",
...@@ -95,8 +96,8 @@ ...@@ -95,8 +96,8 @@
} }
], ],
"source": [ "source": [
"df = pd.read_csv(CONSTANTS.DATASET_PATH)\n", "df = None\n",
"df.shape\n", "df = HELPERS.read_dataset_from_csv(CONSTANTS.DATASET_PATH)\n",
"assert df.shape == CONSTANTS.DATASET_SHAPE, f\"Expected shape {CONSTANTS.DATASET_SHAPE}, but got {df.shape}\" \n", "assert df.shape == CONSTANTS.DATASET_SHAPE, f\"Expected shape {CONSTANTS.DATASET_SHAPE}, but got {df.shape}\" \n",
"print(\"Dataset loaded successfully with shape:\", df.shape)" "print(\"Dataset loaded successfully with shape:\", df.shape)"
] ]
...@@ -469,9 +470,7 @@ ...@@ -469,9 +470,7 @@
"metadata": {}, "metadata": {},
"outputs": [], "outputs": [],
"source": [ "source": [
"df.to_csv(CONSTANTS.PREPROCESSED_DATASET_PATH, index=False)\n", "df.to_csv(CONSTANTS.PREPROCESSED_DATASET_PATH, index=False)"
"# Free memory\n",
"df = None"
] ]
}, },
{ {
...@@ -497,15 +496,15 @@ ...@@ -497,15 +496,15 @@
"name": "stdout", "name": "stdout",
"output_type": "stream", "output_type": "stream",
"text": [ "text": [
"Dataset loaded successfully\n" "Dataset loaded successfully with shape: (9465, 94)\n"
] ]
} }
], ],
"source": [ "source": [
"df = None\n", "df = None\n",
"df = pd.read_csv(CONSTANTS.PREPROCESSED_DATASET_PATH)\n", "df = HELPERS.read_dataset_from_csv(CONSTANTS.PREPROCESSED_DATASET_PATH)\n",
"assert df.shape == CONSTANTS.PREPROCESSED_DATASET_SHAPE, f\"Expected shape {CONSTANTS.PREPROCESSED_DATASET_SHAPE}, but got {df.shape}\" \n", "assert df.shape == CONSTANTS.PREPROCESSED_DATASET_SHAPE, f\"Expected shape {CONSTANTS.PREPROCESSED_DATASET_SHAPE}, but got {df.shape}\" \n",
"print(f\"Dataset loaded successfully\")" "print(\"Dataset loaded successfully with shape:\", df.shape)"
] ]
}, },
{ {
...@@ -602,17 +601,17 @@ ...@@ -602,17 +601,17 @@
" <tr>\n", " <tr>\n",
" <th>6</th>\n", " <th>6</th>\n",
" <td>0.200000</td>\n", " <td>0.200000</td>\n",
" <td>(Pastry, Bread)</td>\n", " <td>(Bread, Pastry)</td>\n",
" </tr>\n", " </tr>\n",
" <tr>\n", " <tr>\n",
" <th>7</th>\n", " <th>7</th>\n",
" <td>0.200000</td>\n", " <td>0.200000</td>\n",
" <td>(Pastry, Coffee)</td>\n", " <td>(Coffee, Pastry)</td>\n",
" </tr>\n", " </tr>\n",
" <tr>\n", " <tr>\n",
" <th>8</th>\n", " <th>8</th>\n",
" <td>0.200000</td>\n", " <td>0.200000</td>\n",
" <td>(Pastry, Medialuna)</td>\n", " <td>(Medialuna, Pastry)</td>\n",
" </tr>\n", " </tr>\n",
" </tbody>\n", " </tbody>\n",
"</table>\n", "</table>\n",
...@@ -626,9 +625,9 @@ ...@@ -626,9 +625,9 @@
"3 0.200000 (Muffin)\n", "3 0.200000 (Muffin)\n",
"4 0.400000 (Pastry)\n", "4 0.400000 (Pastry)\n",
"5 0.200000 (Scandinavian)\n", "5 0.200000 (Scandinavian)\n",
"6 0.200000 (Pastry, Bread)\n", "6 0.200000 (Bread, Pastry)\n",
"7 0.200000 (Pastry, Coffee)\n", "7 0.200000 (Coffee, Pastry)\n",
"8 0.200000 (Pastry, Medialuna)" "8 0.200000 (Medialuna, Pastry)"
] ]
}, },
"execution_count": 17, "execution_count": 17,
...@@ -711,55 +710,55 @@ ...@@ -711,55 +710,55 @@
" </tr>\n", " </tr>\n",
" <tr>\n", " <tr>\n",
" <th>1</th>\n", " <th>1</th>\n",
" <td>(Pastry)</td>\n",
" <td>(Coffee)</td>\n", " <td>(Coffee)</td>\n",
" <td>0.400000</td>\n", " <td>(Pastry)</td>\n",
" <td>0.266667</td>\n", " <td>0.266667</td>\n",
" <td>0.400000</td>\n",
" <td>0.2</td>\n", " <td>0.2</td>\n",
" <td>0.50</td>\n", " <td>0.75</td>\n",
" <td>1.875000</td>\n", " <td>1.875000</td>\n",
" <td>0.093333</td>\n", " <td>0.093333</td>\n",
" <td>1.466667</td>\n", " <td>2.400000</td>\n",
" <td>0.777778</td>\n", " <td>0.636364</td>\n",
" </tr>\n", " </tr>\n",
" <tr>\n", " <tr>\n",
" <th>2</th>\n", " <th>2</th>\n",
" <td>(Coffee)</td>\n",
" <td>(Pastry)</td>\n", " <td>(Pastry)</td>\n",
" <td>0.266667</td>\n", " <td>(Coffee)</td>\n",
" <td>0.400000</td>\n", " <td>0.400000</td>\n",
" <td>0.266667</td>\n",
" <td>0.2</td>\n", " <td>0.2</td>\n",
" <td>0.75</td>\n", " <td>0.50</td>\n",
" <td>1.875000</td>\n", " <td>1.875000</td>\n",
" <td>0.093333</td>\n", " <td>0.093333</td>\n",
" <td>2.400000</td>\n", " <td>1.466667</td>\n",
" <td>0.636364</td>\n", " <td>0.777778</td>\n",
" </tr>\n", " </tr>\n",
" <tr>\n", " <tr>\n",
" <th>3</th>\n", " <th>3</th>\n",
" <td>(Pastry)</td>\n",
" <td>(Medialuna)</td>\n", " <td>(Medialuna)</td>\n",
" <td>0.400000</td>\n", " <td>(Pastry)</td>\n",
" <td>0.333333</td>\n", " <td>0.333333</td>\n",
" <td>0.400000</td>\n",
" <td>0.2</td>\n", " <td>0.2</td>\n",
" <td>0.50</td>\n", " <td>0.60</td>\n",
" <td>1.500000</td>\n", " <td>1.500000</td>\n",
" <td>0.066667</td>\n", " <td>0.066667</td>\n",
" <td>1.333333</td>\n", " <td>1.500000</td>\n",
" <td>0.555556</td>\n", " <td>0.500000</td>\n",
" </tr>\n", " </tr>\n",
" <tr>\n", " <tr>\n",
" <th>4</th>\n", " <th>4</th>\n",
" <td>(Medialuna)</td>\n",
" <td>(Pastry)</td>\n", " <td>(Pastry)</td>\n",
" <td>0.333333</td>\n", " <td>(Medialuna)</td>\n",
" <td>0.400000</td>\n", " <td>0.400000</td>\n",
" <td>0.333333</td>\n",
" <td>0.2</td>\n", " <td>0.2</td>\n",
" <td>0.60</td>\n", " <td>0.50</td>\n",
" <td>1.500000</td>\n", " <td>1.500000</td>\n",
" <td>0.066667</td>\n", " <td>0.066667</td>\n",
" <td>1.500000</td>\n", " <td>1.333333</td>\n",
" <td>0.500000</td>\n", " <td>0.555556</td>\n",
" </tr>\n", " </tr>\n",
" </tbody>\n", " </tbody>\n",
"</table>\n", "</table>\n",
...@@ -768,17 +767,17 @@ ...@@ -768,17 +767,17 @@
"text/plain": [ "text/plain": [
" antecedents consequents antecedent support consequent support support \\\n", " antecedents consequents antecedent support consequent support support \\\n",
"0 (Pastry) (Bread) 0.400000 0.466667 0.2 \n", "0 (Pastry) (Bread) 0.400000 0.466667 0.2 \n",
"1 (Pastry) (Coffee) 0.400000 0.266667 0.2 \n", "1 (Coffee) (Pastry) 0.266667 0.400000 0.2 \n",
"2 (Coffee) (Pastry) 0.266667 0.400000 0.2 \n", "2 (Pastry) (Coffee) 0.400000 0.266667 0.2 \n",
"3 (Pastry) (Medialuna) 0.400000 0.333333 0.2 \n", "3 (Medialuna) (Pastry) 0.333333 0.400000 0.2 \n",
"4 (Medialuna) (Pastry) 0.333333 0.400000 0.2 \n", "4 (Pastry) (Medialuna) 0.400000 0.333333 0.2 \n",
"\n", "\n",
" confidence lift leverage conviction zhangs_metric \n", " confidence lift leverage conviction zhangs_metric \n",
"0 0.50 1.071429 0.013333 1.066667 0.111111 \n", "0 0.50 1.071429 0.013333 1.066667 0.111111 \n",
"1 0.50 1.875000 0.093333 1.466667 0.777778 \n", "1 0.75 1.875000 0.093333 2.400000 0.636364 \n",
"2 0.75 1.875000 0.093333 2.400000 0.636364 \n", "2 0.50 1.875000 0.093333 1.466667 0.777778 \n",
"3 0.50 1.500000 0.066667 1.333333 0.555556 \n", "3 0.60 1.500000 0.066667 1.500000 0.500000 \n",
"4 0.60 1.500000 0.066667 1.500000 0.500000 " "4 0.50 1.500000 0.066667 1.333333 0.555556 "
] ]
}, },
"execution_count": 18, "execution_count": 18,
...@@ -816,15 +815,15 @@ ...@@ -816,15 +815,15 @@
"name": "stdout", "name": "stdout",
"output_type": "stream", "output_type": "stream",
"text": [ "text": [
"Dataset loaded successfully\n" "Dataset loaded successfully with shape: (9465, 94)\n"
] ]
} }
], ],
"source": [ "source": [
"df = None\n", "df = None\n",
"df = pd.read_csv(CONSTANTS.PREPROCESSED_DATASET_PATH)\n", "df = HELPERS.read_dataset_from_csv(CONSTANTS.PREPROCESSED_DATASET_PATH)\n",
"assert df.shape == CONSTANTS.PREPROCESSED_DATASET_SHAPE, f\"Expected shape {CONSTANTS.PREPROCESSED_DATASET_SHAPE}, but got {df.shape}\" \n", "assert df.shape == CONSTANTS.PREPROCESSED_DATASET_SHAPE, f\"Expected shape {CONSTANTS.PREPROCESSED_DATASET_SHAPE}, but got {df.shape}\" \n",
"print(f\"Dataset loaded successfully\")" "print(\"Dataset loaded successfully with shape:\", df.shape)"
] ]
}, },
{ {
...@@ -921,17 +920,17 @@ ...@@ -921,17 +920,17 @@
" <tr>\n", " <tr>\n",
" <th>6</th>\n", " <th>6</th>\n",
" <td>0.200000</td>\n", " <td>0.200000</td>\n",
" <td>(Pastry, Bread)</td>\n", " <td>(Bread, Pastry)</td>\n",
" </tr>\n", " </tr>\n",
" <tr>\n", " <tr>\n",
" <th>7</th>\n", " <th>7</th>\n",
" <td>0.200000</td>\n", " <td>0.200000</td>\n",
" <td>(Pastry, Coffee)</td>\n", " <td>(Coffee, Pastry)</td>\n",
" </tr>\n", " </tr>\n",
" <tr>\n", " <tr>\n",
" <th>8</th>\n", " <th>8</th>\n",
" <td>0.200000</td>\n", " <td>0.200000</td>\n",
" <td>(Pastry, Medialuna)</td>\n", " <td>(Medialuna, Pastry)</td>\n",
" </tr>\n", " </tr>\n",
" </tbody>\n", " </tbody>\n",
"</table>\n", "</table>\n",
...@@ -945,9 +944,9 @@ ...@@ -945,9 +944,9 @@
"3 0.400000 (Pastry)\n", "3 0.400000 (Pastry)\n",
"4 0.266667 (Coffee)\n", "4 0.266667 (Coffee)\n",
"5 0.333333 (Medialuna)\n", "5 0.333333 (Medialuna)\n",
"6 0.200000 (Pastry, Bread)\n", "6 0.200000 (Bread, Pastry)\n",
"7 0.200000 (Pastry, Coffee)\n", "7 0.200000 (Coffee, Pastry)\n",
"8 0.200000 (Pastry, Medialuna)" "8 0.200000 (Medialuna, Pastry)"
] ]
}, },
"execution_count": 21, "execution_count": 21,
...@@ -1030,55 +1029,55 @@ ...@@ -1030,55 +1029,55 @@
" </tr>\n", " </tr>\n",
" <tr>\n", " <tr>\n",
" <th>1</th>\n", " <th>1</th>\n",
" <td>(Pastry)</td>\n",
" <td>(Coffee)</td>\n", " <td>(Coffee)</td>\n",
" <td>0.400000</td>\n", " <td>(Pastry)</td>\n",
" <td>0.266667</td>\n", " <td>0.266667</td>\n",
" <td>0.400000</td>\n",
" <td>0.2</td>\n", " <td>0.2</td>\n",
" <td>0.50</td>\n", " <td>0.75</td>\n",
" <td>1.875000</td>\n", " <td>1.875000</td>\n",
" <td>0.093333</td>\n", " <td>0.093333</td>\n",
" <td>1.466667</td>\n", " <td>2.400000</td>\n",
" <td>0.777778</td>\n", " <td>0.636364</td>\n",
" </tr>\n", " </tr>\n",
" <tr>\n", " <tr>\n",
" <th>2</th>\n", " <th>2</th>\n",
" <td>(Coffee)</td>\n",
" <td>(Pastry)</td>\n", " <td>(Pastry)</td>\n",
" <td>0.266667</td>\n", " <td>(Coffee)</td>\n",
" <td>0.400000</td>\n", " <td>0.400000</td>\n",
" <td>0.266667</td>\n",
" <td>0.2</td>\n", " <td>0.2</td>\n",
" <td>0.75</td>\n", " <td>0.50</td>\n",
" <td>1.875000</td>\n", " <td>1.875000</td>\n",
" <td>0.093333</td>\n", " <td>0.093333</td>\n",
" <td>2.400000</td>\n", " <td>1.466667</td>\n",
" <td>0.636364</td>\n", " <td>0.777778</td>\n",
" </tr>\n", " </tr>\n",
" <tr>\n", " <tr>\n",
" <th>3</th>\n", " <th>3</th>\n",
" <td>(Pastry)</td>\n",
" <td>(Medialuna)</td>\n", " <td>(Medialuna)</td>\n",
" <td>0.400000</td>\n", " <td>(Pastry)</td>\n",
" <td>0.333333</td>\n", " <td>0.333333</td>\n",
" <td>0.400000</td>\n",
" <td>0.2</td>\n", " <td>0.2</td>\n",
" <td>0.50</td>\n", " <td>0.60</td>\n",
" <td>1.500000</td>\n", " <td>1.500000</td>\n",
" <td>0.066667</td>\n", " <td>0.066667</td>\n",
" <td>1.333333</td>\n", " <td>1.500000</td>\n",
" <td>0.555556</td>\n", " <td>0.500000</td>\n",
" </tr>\n", " </tr>\n",
" <tr>\n", " <tr>\n",
" <th>4</th>\n", " <th>4</th>\n",
" <td>(Medialuna)</td>\n",
" <td>(Pastry)</td>\n", " <td>(Pastry)</td>\n",
" <td>0.333333</td>\n", " <td>(Medialuna)</td>\n",
" <td>0.400000</td>\n", " <td>0.400000</td>\n",
" <td>0.333333</td>\n",
" <td>0.2</td>\n", " <td>0.2</td>\n",
" <td>0.60</td>\n", " <td>0.50</td>\n",
" <td>1.500000</td>\n", " <td>1.500000</td>\n",
" <td>0.066667</td>\n", " <td>0.066667</td>\n",
" <td>1.500000</td>\n", " <td>1.333333</td>\n",
" <td>0.500000</td>\n", " <td>0.555556</td>\n",
" </tr>\n", " </tr>\n",
" </tbody>\n", " </tbody>\n",
"</table>\n", "</table>\n",
...@@ -1087,17 +1086,17 @@ ...@@ -1087,17 +1086,17 @@
"text/plain": [ "text/plain": [
" antecedents consequents antecedent support consequent support support \\\n", " antecedents consequents antecedent support consequent support support \\\n",
"0 (Pastry) (Bread) 0.400000 0.466667 0.2 \n", "0 (Pastry) (Bread) 0.400000 0.466667 0.2 \n",
"1 (Pastry) (Coffee) 0.400000 0.266667 0.2 \n", "1 (Coffee) (Pastry) 0.266667 0.400000 0.2 \n",
"2 (Coffee) (Pastry) 0.266667 0.400000 0.2 \n", "2 (Pastry) (Coffee) 0.400000 0.266667 0.2 \n",
"3 (Pastry) (Medialuna) 0.400000 0.333333 0.2 \n", "3 (Medialuna) (Pastry) 0.333333 0.400000 0.2 \n",
"4 (Medialuna) (Pastry) 0.333333 0.400000 0.2 \n", "4 (Pastry) (Medialuna) 0.400000 0.333333 0.2 \n",
"\n", "\n",
" confidence lift leverage conviction zhangs_metric \n", " confidence lift leverage conviction zhangs_metric \n",
"0 0.50 1.071429 0.013333 1.066667 0.111111 \n", "0 0.50 1.071429 0.013333 1.066667 0.111111 \n",
"1 0.50 1.875000 0.093333 1.466667 0.777778 \n", "1 0.75 1.875000 0.093333 2.400000 0.636364 \n",
"2 0.75 1.875000 0.093333 2.400000 0.636364 \n", "2 0.50 1.875000 0.093333 1.466667 0.777778 \n",
"3 0.50 1.500000 0.066667 1.333333 0.555556 \n", "3 0.60 1.500000 0.066667 1.500000 0.500000 \n",
"4 0.60 1.500000 0.066667 1.500000 0.500000 " "4 0.50 1.500000 0.066667 1.333333 0.555556 "
] ]
}, },
"execution_count": 22, "execution_count": 22,
...@@ -1135,15 +1134,15 @@ ...@@ -1135,15 +1134,15 @@
"name": "stdout", "name": "stdout",
"output_type": "stream", "output_type": "stream",
"text": [ "text": [
"Dataset loaded successfully\n" "Dataset loaded successfully with shape: (9465, 94)\n"
] ]
} }
], ],
"source": [ "source": [
"df = None\n", "df = None\n",
"df = pd.read_csv(CONSTANTS.PREPROCESSED_DATASET_PATH)\n", "df = HELPERS.read_dataset_from_csv(CONSTANTS.PREPROCESSED_DATASET_PATH)\n",
"assert df.shape == CONSTANTS.PREPROCESSED_DATASET_SHAPE, f\"Expected shape {CONSTANTS.PREPROCESSED_DATASET_SHAPE}, but got {df.shape}\" \n", "assert df.shape == CONSTANTS.PREPROCESSED_DATASET_SHAPE, f\"Expected shape {CONSTANTS.PREPROCESSED_DATASET_SHAPE}, but got {df.shape}\" \n",
"print(f\"Dataset loaded successfully\")" "print(\"Dataset loaded successfully with shape:\", df.shape)"
] ]
}, },
{ {
...@@ -1162,7 +1161,7 @@ ...@@ -1162,7 +1161,7 @@
"name": "stdout", "name": "stdout",
"output_type": "stream", "output_type": "stream",
"text": [ "text": [
"Execution time for Apriori: 11.543023109436035 seconds\n" "Execution time for Apriori: 11.037009716033936 seconds\n"
] ]
} }
], ],
...@@ -1193,7 +1192,7 @@ ...@@ -1193,7 +1192,7 @@
"name": "stdout", "name": "stdout",
"output_type": "stream", "output_type": "stream",
"text": [ "text": [
"Execution time for FP Growth: 2.7200090885162354 seconds\n" "Execution time for FP Growth: 2.707993268966675 seconds\n"
] ]
} }
], ],
...@@ -1219,7 +1218,7 @@ ...@@ -1219,7 +1218,7 @@
"cell_type": "markdown", "cell_type": "markdown",
"metadata": {}, "metadata": {},
"source": [ "source": [
"> **As we can notice, `FP Growth` is much faster than `Apriori` ***(about 5 times faster!)***.** \n", "> **As we can notice, `FP Growth` is much faster than `Apriori`** ***(about 5 times faster!)***. \n",
"> **This is because `FP Growth` requires access the dataset multiple times to find repeated groups, when `Apriori` constructs the tree from the beginning and then don't access dataset again (working only with tree)**" "> **This is because `FP Growth` requires access the dataset multiple times to find repeated groups, when `Apriori` constructs the tree from the beginning and then don't access dataset again (working only with tree)**"
] ]
} }
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment