Commit c8a34262 authored by Almouhannad Hafez's avatar Almouhannad Hafez

Add Eclat

parent df109ff0
This diff is collapsed.
import pandas as pd import pandas as pd
from mlxtend.frequent_patterns import apriori, association_rules from mlxtend.frequent_patterns import apriori, fpgrowth, association_rules
from mlxtend.frequent_patterns import fpgrowth from pyECLAT import ECLAT
import numpy as np
class HELPERS: class HELPERS:
def read_dataset_from_csv(path: str) -> pd.DataFrame: def read_dataset_from_csv(path: str) -> pd.DataFrame:
...@@ -38,7 +39,7 @@ class HELPERS: ...@@ -38,7 +39,7 @@ class HELPERS:
Args: Args:
algorithm (str): Algorithm to use for finding repeated item sets. Must be either 'apriori' or 'fpgrowth' algorithm (str): Algorithm to use for finding repeated item sets. Must be either 'apriori' or 'fpgrowth'
data (pd.DataFrame): Data in form of a pandas DataFrame data (pd.DataFrame): Data in form of a pandas DataFrame (one-hot encoded)
min_support (float): minimum support threshold for the item sets. min_support (float): minimum support threshold for the item sets.
Returns: Returns:
...@@ -47,7 +48,7 @@ class HELPERS: ...@@ -47,7 +48,7 @@ class HELPERS:
# Type checking # Type checking
assert isinstance(algorithm, str), "Algorithm must be a string" assert isinstance(algorithm, str), "Algorithm must be a string"
assert algorithm in ["apriori", "fpgrowth"], "Algorithm must be either 'apriori' or 'fpgrowth'" assert algorithm in ["apriori", "fpgrowth", "eclat"], "Algorithm must be either 'apriori' or 'fpgrowth' or 'eclat'"
assert isinstance(data, pd.DataFrame), "Data must be a pandas DataFrame" assert isinstance(data, pd.DataFrame), "Data must be a pandas DataFrame"
assert isinstance(min_support, (int, float)) and min_support > 0, "min_support must be a positive number" assert isinstance(min_support, (int, float)) and min_support > 0, "min_support must be a positive number"
...@@ -57,10 +58,37 @@ class HELPERS: ...@@ -57,10 +58,37 @@ class HELPERS:
repeated_item_sets_finder = apriori repeated_item_sets_finder = apriori
elif algorithm == "fpgrowth": elif algorithm == "fpgrowth":
repeated_item_sets_finder = fpgrowth repeated_item_sets_finder = fpgrowth
elif algorithm == "eclat":
# Convert one-hot encoded DataFrame to transaction format to be able to use with Eclat module
transactions = []
for _, row in data.iterrows():
transaction = row.index[row == 1].tolist() # Get items where the value is 1
transactions.append(transaction)
# Create a DataFrame from the transactions
max_length = max(len(x) for x in transactions) # Find the max length of transactions
transactional_data = pd.DataFrame(transactions, columns=range(max_length)).fillna(np.nan)
repeated_item_sets_finder = ECLAT(data = transactional_data)
# Handle excpetions # Handle excpetions
try: try:
repeated_item_sets = repeated_item_sets_finder(data, min_support=min_support, use_colnames=True) repeated_item_sets = None
if algorithm == 'apriori' or algorithm == 'fpgrowth':
repeated_item_sets = repeated_item_sets_finder(data, min_support=min_support, use_colnames=True)
else:
_, repeated_item_sets = repeated_item_sets_finder.fit(min_support=min_support, separator=', ', verbose=False)
# Fix the result dictionary
# Initialize the new dictionary
fixed_dict = {'itemsets': [], 'support': []}
# Populate the new dictionary
for key, value in repeated_item_sets.items():
# Split the key into a list of items
itemset = key.split(', ')
# Append the itemset and its corresponding support value
fixed_dict['itemsets'].append(itemset)
fixed_dict['support'].append(value)
repeated_item_sets = pd.DataFrame(fixed_dict)
except Exception as e: except Exception as e:
raise RuntimeError(f"An error occurred while finding repeated item sets: {str(e)}") raise RuntimeError(f"An error occurred while finding repeated item sets: {str(e)}")
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment