#include <mpi.h>
#include <stdlib.h>
#include <stdio.h>
#include <string.h>
#include <math.h>
#include <stdbool.h>
#include <unistd.h>

void prefix_mpi(int* block_array, int block_size, int* block_prefix, MPI_Comm communicator)
{
    int my_rank;
    int com_size;
    MPI_Comm_rank(communicator, &my_rank);
    MPI_Comm_size(communicator, &com_size);
   
    int* local_prefix = (int*)malloc(block_size * sizeof(int));
    int prefix_sum = 0;
    for (int i = 0; i < block_size; i++) {
        prefix_sum += block_array[i];
        local_prefix[i] = prefix_sum;
    }
   

    int* prefix_sums = NULL;
    int* update_prefix_sums = NULL;

    if (my_rank == 0) {
        prefix_sums = (int*)malloc(com_size * sizeof(int));
        update_prefix_sums = (int*)malloc(com_size * sizeof(int));
        update_prefix_sums[0] = 0;
    }
    
    
    MPI_Gather(&prefix_sum, 1, MPI_INT, prefix_sums, 1, MPI_INT, 0, communicator);

    if (my_rank == 0) {
         printf("Process %d: received Sums: ", my_rank);
        for (int i = 0; i < com_size; i++) {
            printf("%d ", prefix_sums[i]);
        }
        printf("\n");
        int total_sum = 0;
        for (int i = 0; i < com_size; i++) {
            total_sum += prefix_sums[i];
            prefix_sums[i] = total_sum;
        }
        
        for (int i = 0; i < com_size - 1; i++) {
            update_prefix_sums[i+1] =  prefix_sums[i];
        }
        
    }
   
    MPI_Scatter(update_prefix_sums, 1, MPI_INT, &prefix_sum, 1, MPI_INT, 0, communicator);

    
      for (int i = 0; i < block_size; i++) {
        block_prefix[i] = local_prefix[i] + prefix_sum;
    }
    
    
    
    
    


    free(local_prefix);
    if (my_rank == 0) {
        free(prefix_sums);
    }
}


int main(int argc, char** args)
{
    MPI_Init(&argc, &args);

    int my_rank;
    int com_size;
    MPI_Comm_rank(MPI_COMM_WORLD, &my_rank);
    MPI_Comm_size(MPI_COMM_WORLD, &com_size);

    int total_array_size = 32;

    if (total_array_size % com_size != 0)
        total_array_size = (total_array_size / com_size + 1) * com_size;

    int block_size = total_array_size / com_size;
    int* total_array = NULL;
    int* total_prefix = NULL;

    if (my_rank == 0)
    {
        total_array = (int*)malloc(total_array_size * sizeof(int));
        total_prefix = (int*)malloc(total_array_size * sizeof(int));
        for (int i = 0; i < total_array_size; i++)
            total_array[i] = rand() % 11;

        printf("Total Array: ");
        for (int i = 0; i < total_array_size; i++)
            printf("%d ", total_array[i]);
        printf("\n\n");
    }

    int* block_array = (int*)malloc(block_size * sizeof(int));
    int* block_prefix = (int*)malloc(block_size * sizeof(int));
    
    
    
    MPI_Scatter(total_array, block_size, MPI_INT,
        block_array, block_size, MPI_INT, 0, MPI_COMM_WORLD);
   
    prefix_mpi(block_array, block_size, block_prefix, MPI_COMM_WORLD);
  
    MPI_Gather(block_prefix, block_size, MPI_INT,
        total_prefix, block_size, MPI_INT, 0, MPI_COMM_WORLD);


      int accum = 0;
    if (my_rank == 0)
    {
         printf("Block Prefixes:\n");
        for (int i = 0; i < com_size; i++)
        {
            printf("Process %d: ", i);
            for (int j = 0; j < block_size; j++)
                printf("%d ", total_prefix[i * block_size + j]);
            printf("\n");
        }
        for (int i = 1; i < total_array_size; i++)
        {
            accum += total_array[i - 1];
            if (total_prefix[i-1] != accum)
                printf("Error at index %i: %i expected, %i computed\n", i, accum, total_prefix[i-1]);
        }

        printf("Test completed!\n"); 
        free(total_array);
        free(total_prefix);
    }

    free(block_array);
    free(block_prefix);

    MPI_Finalize();

    return 0;

}