import pandas as pd

from mlxtend.frequent_patterns import apriori, fpgrowth, association_rules
from pyECLAT import ECLAT
import numpy as np

class HELPERS:
    def read_dataset_from_csv(path: str) -> pd.DataFrame:
        """
        Read a dataset from a .csv file

        Args:
            path (str): Path to .csv file to be read

        Returns:
            data (pd.DataFrame): Pandas DataFrame containing the data from the CSV file
        """
        
        # Type checking
        assert isinstance(path, str), "path must be a string"
        
        # Handle excpetions
        try:
            data = pd.read_csv(path)
            return data
        except FileNotFoundError:
            print(f"Error: file at {path} was not found")
        except pd.errors.EmptyDataError:
            print(f"Error: file at {path} is empty")
        except pd.errors.ParserError:
            print(f"Error: file at {path} could not be parsed")
        except Exception as e:
            print(f"Unexpected error occurred: {e}")


    def find_repeated_item_sets(algorithm: str, data: pd.DataFrame, min_support: float) -> pd.DataFrame:
        """
        Find repeated item sets in the given data using the specified algorithm

        Args:
            algorithm (str): Algorithm to use for finding repeated item sets. Must be either 'apriori' or 'fpgrowth'
            data (pd.DataFrame): Data in form of a pandas DataFrame (one-hot encoded)
            min_support (float): minimum support threshold for the item sets.

        Returns:
            repeated_item_sets (pd.DataFrame): DataFrame containing the repeated item sets found in the data
        """
        
        # Type checking
        assert isinstance(algorithm, str), "Algorithm must be a string"
        assert algorithm in ["apriori", "fpgrowth", "eclat"], "Algorithm must be either 'apriori' or 'fpgrowth' or 'eclat'"
        assert isinstance(data, pd.DataFrame), "Data must be a pandas DataFrame"
        assert isinstance(min_support, (int, float)) and min_support > 0, "min_support must be a positive number"

        repeated_item_sets_finder = None
        
        if algorithm == "apriori":
            repeated_item_sets_finder = apriori
        elif algorithm == "fpgrowth":
            repeated_item_sets_finder = fpgrowth
        elif algorithm == "eclat":
            # Convert one-hot encoded DataFrame to transaction format to be able to use with Eclat module
            transactions = []
            for _, row in data.iterrows():
                transaction = row.index[row == 1].tolist()  # Get items where the value is 1
                transactions.append(transaction)
            # Create a DataFrame from the transactions
            max_length = max(len(x) for x in transactions)  # Find the max length of transactions
            transactional_data = pd.DataFrame(transactions, columns=range(max_length)).fillna(np.nan)
            repeated_item_sets_finder = ECLAT(data = transactional_data)            

        # Handle excpetions
        try:
            repeated_item_sets = None
            if algorithm == 'apriori' or algorithm == 'fpgrowth':
                repeated_item_sets = repeated_item_sets_finder(data, min_support=min_support, use_colnames=True)
            else:
                _, repeated_item_sets = repeated_item_sets_finder.fit(min_support=min_support, separator=', ', verbose=False)
                # Fix the result dictionary
                # Initialize the new dictionary
                fixed_dict = {'itemsets': [], 'support': []}

                # Populate the new dictionary
                for key, value in repeated_item_sets.items():
                    # Split the key into a list of items
                    itemset = key.split(', ')
                    # Append the itemset and its corresponding support value
                    fixed_dict['itemsets'].append(itemset)
                    fixed_dict['support'].append(value)
                repeated_item_sets = pd.DataFrame(fixed_dict)

        except Exception as e:
            raise RuntimeError(f"An error occurred while finding repeated item sets: {str(e)}")

        return repeated_item_sets
    
    def get_rules(repeated_item_sets: pd.DataFrame, min_confidence: float) -> pd.DataFrame:
        """
        Generate association rules from repeated item sets

        Args:
            repeated_item_sets (pd.DataFrame): DataFrame containing repeated item sets
            min_confidence (float): Minimum confidence threshold for the rules.

        Returns:
            rules (pd.DataFrame): DataFrame containing generated association rules
        """
        # Type checking
        assert isinstance(repeated_item_sets, pd.DataFrame), "repeated_item_sets must be a pandas DataFrame"
        assert isinstance(min_confidence, (int, float)) and min_confidence > 0, "min_confidence must be a positive number"

        # Handle excpetions
        try:
            rules = association_rules(repeated_item_sets, metric='confidence', min_threshold=min_confidence)
        except Exception as e:
            raise RuntimeError(f"An error occurred while generating association rules: {str(e)}")

        return rules