You need to sign in or sign up before continuing.
Commit c8a34262 authored by Almouhannad Hafez's avatar Almouhannad Hafez

Add Eclat

parent df109ff0
This diff is collapsed.
import pandas as pd
from mlxtend.frequent_patterns import apriori, association_rules
from mlxtend.frequent_patterns import fpgrowth
from mlxtend.frequent_patterns import apriori, fpgrowth, association_rules
from pyECLAT import ECLAT
import numpy as np
class HELPERS:
def read_dataset_from_csv(path: str) -> pd.DataFrame:
......@@ -38,7 +39,7 @@ class HELPERS:
Args:
algorithm (str): Algorithm to use for finding repeated item sets. Must be either 'apriori' or 'fpgrowth'
data (pd.DataFrame): Data in form of a pandas DataFrame
data (pd.DataFrame): Data in form of a pandas DataFrame (one-hot encoded)
min_support (float): minimum support threshold for the item sets.
Returns:
......@@ -47,7 +48,7 @@ class HELPERS:
# Type checking
assert isinstance(algorithm, str), "Algorithm must be a string"
assert algorithm in ["apriori", "fpgrowth"], "Algorithm must be either 'apriori' or 'fpgrowth'"
assert algorithm in ["apriori", "fpgrowth", "eclat"], "Algorithm must be either 'apriori' or 'fpgrowth' or 'eclat'"
assert isinstance(data, pd.DataFrame), "Data must be a pandas DataFrame"
assert isinstance(min_support, (int, float)) and min_support > 0, "min_support must be a positive number"
......@@ -57,10 +58,37 @@ class HELPERS:
repeated_item_sets_finder = apriori
elif algorithm == "fpgrowth":
repeated_item_sets_finder = fpgrowth
elif algorithm == "eclat":
# Convert one-hot encoded DataFrame to transaction format to be able to use with Eclat module
transactions = []
for _, row in data.iterrows():
transaction = row.index[row == 1].tolist() # Get items where the value is 1
transactions.append(transaction)
# Create a DataFrame from the transactions
max_length = max(len(x) for x in transactions) # Find the max length of transactions
transactional_data = pd.DataFrame(transactions, columns=range(max_length)).fillna(np.nan)
repeated_item_sets_finder = ECLAT(data = transactional_data)
# Handle excpetions
try:
repeated_item_sets = None
if algorithm == 'apriori' or algorithm == 'fpgrowth':
repeated_item_sets = repeated_item_sets_finder(data, min_support=min_support, use_colnames=True)
else:
_, repeated_item_sets = repeated_item_sets_finder.fit(min_support=min_support, separator=', ', verbose=False)
# Fix the result dictionary
# Initialize the new dictionary
fixed_dict = {'itemsets': [], 'support': []}
# Populate the new dictionary
for key, value in repeated_item_sets.items():
# Split the key into a list of items
itemset = key.split(', ')
# Append the itemset and its corresponding support value
fixed_dict['itemsets'].append(itemset)
fixed_dict['support'].append(value)
repeated_item_sets = pd.DataFrame(fixed_dict)
except Exception as e:
raise RuntimeError(f"An error occurred while finding repeated item sets: {str(e)}")
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment