#include <mpi.h>
#include <stdlib.h>
#include <stdio.h>
#include <string.h>
#include <math.h>
#include <stdbool.h>

// Function to perform parallel reduction using a binary tree
void reduce_tree(
    int* send_data,
    int* recv_data,
    int count,
    MPI_Comm communicator)
{
    int my_rank;
    int size;
    MPI_Comm_rank(communicator, &my_rank);
    MPI_Comm_size(communicator, &size);

    // Allocate memory for local reduction
    int* local_result = (int*)malloc(count * sizeof(int));
    memcpy(local_result, send_data, count * sizeof(int));

    int step = 1;
    while (step < size)
    {
        if (my_rank % (2 * step) == 0)
        {
            int source = my_rank + step;
            if (source < size)
            {
                int* received_data = (int*)malloc(count * sizeof(int));

                // Receive data from the source process
                MPI_Recv(received_data, count, MPI_INT, source, 0, communicator, MPI_STATUS_IGNORE);

                // Perform element-wise addition with received data
                for (int i = 0; i < count; i++)
                {
                    local_result[i] += received_data[i];
                }

                free(received_data);
            }
        }
        else
        {
            int destination = my_rank - step;

            // Send local result to the destination process
            MPI_Send(local_result, count, MPI_INT, destination, 0, communicator);
            break;
        }

        step *= 2;
    }

    // Copy the final result to the recv_data buffer for the root process
    if (my_rank == 0)
    {
        memcpy(recv_data, local_result, count * sizeof(int));
    }

    free(local_result);
}

// Function to perform sequential reduction
void reduce_sequential(
    int* send_data,
    int* recv_data,
    int count,
    MPI_Comm communicator)
{
    int my_rank;
    int com_size;
    MPI_Comm_rank(communicator, &my_rank);
    MPI_Comm_size(communicator, &com_size);

    int* gather_buffer = NULL;
    if (my_rank == 0)
    {
        // Allocate memory for the gather buffer on root process
        gather_buffer = (int*)calloc(count * com_size, sizeof(int));
    }

    // Gather data from all processes to the root process
    MPI_Gather(send_data, count, MPI_INT, gather_buffer, count, MPI_INT, 0, communicator);

    if (my_rank == 0)
    {
        // Initialize the recv_data buffer on the root process
        memset(recv_data, 0, count * sizeof(int));

        // Perform element-wise addition with gathered data
        for (int p = 0; p < com_size; p++)
            for (int i = 0; i < count; i++)
                recv_data[i] += gather_buffer[count * p + i];

        free(gather_buffer);
    }
}

int main(int argc, char** args)
{
    MPI_Init(&argc, &args);
    int count = 40;
    int* recv_array_tree = NULL;
    int* recv_array_sequential = NULL;

    int my_rank;
    MPI_Comm_rank(MPI_COMM_WORLD, &my_rank);
    if (my_rank == 0)
    {
        // Allocate memory for result arrays on root process
        recv_array_tree = (int*)malloc(count * sizeof(int));
        recv_array_sequential = (int*)malloc(count * sizeof(int));
    }

    int* send_array = (int*)malloc(count * sizeof(int));
    for (int i = 0; i < count; i++)
        send_array[i] = my_rank;

    // Record start time for parallel algorithm
    double start_parallel = MPI_Wtime();
    reduce_tree(send_array, recv_array_tree, count, MPI_COMM_WORLD);
    // Record end time for parallel algorithm
    double end_parallel = MPI_Wtime();

    // Record start time for sequential algorithm
    double start_sequential = MPI_Wtime();
    reduce_sequential(send_array, recv_array_sequential, count, MPI_COMM_WORLD);
    // Record end time for sequential algorithm
    double end_sequential = MPI_Wtime();

    if (my_rank == 0)
    {
        // Compare results and print messages
        for (int i = 0; i < count; i++)
            if (recv_array_tree[i] == recv_array_sequential[i])
                printf("At index %i: reduce_tree is %i, reduce_sequential is %i\n",
                    i, recv_array_tree[i], recv_array_sequential[i]);

        // Calculate and print execution times
        printf("Parallel Algorithm Time: %f seconds\n", end_parallel - start_parallel);
        printf("Sequential Algorithm Time: %f seconds\n", end_sequential - start_sequential);

        // Free allocated memory for result arrays on root process
        free(recv_array_tree);
        free(recv_array_sequential);
    }

    // Free allocated memory for input array on each process
    free(send_array);
    MPI_Finalize();
    return 0;
}
