#include <mpi.h>
#include <stdlib.h>
#include <stdio.h>
#include <string.h>
#include <math.h>  
#include <stdbool.h>

void prefix_mpi(int* block_array, int block_size, int* block_prefix, MPI_Comm communicator)
{
    int my_rank, com_size;
    MPI_Comm_rank(communicator, &my_rank);
    MPI_Comm_size(communicator, &com_size);

    int* local_prefix = (int*)malloc(block_size * sizeof(int));
    local_prefix[0] = block_array[0];

    // Calculate local prefix sum
    for (int i = 1; i < block_size; i++) {
        local_prefix[i] = local_prefix[i - 1] + block_array[i];
    }

    // Perform prefix sum across processes
    int* prefix_sums = (int*)malloc(com_size * sizeof(int));
    MPI_Gather(&local_prefix[block_size - 1], 1, MPI_INT,prefix_sums, 1, MPI_INT, 0, communicator);

    int global_accum = 0;

    // Calculate the total prefix sum from prefix sums of all processes
    if (my_rank == 0) {
        for (int i = 0; i < com_size; i++) {
            global_accum += prefix_sums[i];
            prefix_sums[i] = global_accum;
        }
    }

    // Broadcast the global prefix sum to all processes
    MPI_Bcast(prefix_sums, com_size, MPI_INT, 0, communicator);

    // Adjust local prefix sums with the global prefix sum
    for (int i = 0; i < block_size; i++) {
        block_prefix[i] = local_prefix[i] + prefix_sums[my_rank] - local_prefix[block_size - 1];
    }

    free(local_prefix);
    free(prefix_sums);
}


int main(int argc, char** args)
{
    // Initialize MPI
    MPI_Init(&argc, &args);

    int my_rank;
    int com_size;
    MPI_Comm_rank(MPI_COMM_WORLD, &my_rank);
    MPI_Comm_size(MPI_COMM_WORLD, &com_size);

    // Set the total array size and adjust it for an even distribution among processes
    int total_array_size = 64;
    if (total_array_size % com_size != 0)
        total_array_size = (total_array_size / com_size + 1) * com_size;

    // Calculate block size for each process
    int block_size = total_array_size / com_size;

    // Arrays to store the total array and its prefix sum
    int* total_array = NULL;
    int* total_prefix = NULL;

    // Master process (rank 0) initializes the total array with random values
    if (my_rank == 0)
    {
        total_array = (int*)malloc(total_array_size * sizeof(int));
        total_prefix = (int*)malloc(total_array_size * sizeof(int));
        for (int i = 0; i < total_array_size; i++)
            total_array[i] = rand() % 11;

        // Display the generated total array
        printf("Total Array: ");
        for (int i = 0; i < total_array_size; i++)
            printf("%d ", total_array[i]);
        printf("\n\n");
    }

    // Arrays to store the block array and its prefix sum for each process
    int* block_array = (int*)malloc(block_size * sizeof(int));
    int* block_prefix = (int*)malloc(block_size * sizeof(int));

    // Scatter the total array among processes
    MPI_Scatter(total_array, block_size, MPI_INT,
        block_array, block_size, MPI_INT, 0, MPI_COMM_WORLD);

    // Calculate the prefix sum for the block array
    prefix_mpi(block_array, block_size, block_prefix, MPI_COMM_WORLD);

    // Gather the block prefixes back to the total prefix array on the master process
    MPI_Gather(block_prefix, block_size, MPI_INT,
        total_prefix, block_size, MPI_INT, 0, MPI_COMM_WORLD);

    // Validate the computed prefix sum against the sequential calculation on the master process
    int accum = 0;
    if (my_rank == 0)
    {
    
      // Display block prefixes for all processes
        printf("total Prefixes:\n");
            for (int j = 0; j < total_array_size; j++)
                printf("%d ", total_prefix[j]);
            printf("\n");
    
    
        // Display block prefixes for each process
        printf("\n for each process :\n");
        for (int i = 0; i < com_size; i++)
        {
            printf("Process %d: ", i);
            for (int j = 0; j < block_size; j++)
                printf("%d ", total_prefix[i * block_size + j]);
            printf("\n");
        }

        // Validate the computed prefix sum against the sequential calculation
        for (int i = 1; i < total_array_size; i++)
        {
            accum += total_array[i - 1];
            if (total_prefix[i-1] != accum)
                printf("Error at index %i: %i expected, %i computed\n", i, accum, total_prefix[i-1]);
        }

        
        printf("Test completed!\n"); 
        free(total_array);
        free(total_prefix);
    }

    // Free allocated memory for block arrays
    free(block_array);
    free(block_prefix);

    // Finalize MPI
    MPI_Finalize();

    return 0;
}
