#include <mpi.h>
#include <stdlib.h>
#include <stdio.h>
#include <string.h>
#include <math.h>  
#include <stdbool.h>
#include <unistd.h>

void reduce_tree(int* send_data, int* recv_data, int count, MPI_Comm communicator)
{
    int my_rank, com_size;
    MPI_Comm_rank(communicator, &my_rank);
    MPI_Comm_size(communicator, &com_size);
    int num_children = 0;
    int* children = NULL;
    if(my_rank){
      recv_data = (int*)malloc(count * sizeof(int));
    }
    
    memset(recv_data, 0, count * sizeof(int));
    int parent = (my_rank - 1) / 2;
    int child1 = 2 * my_rank + 1;
    int child2 = 2 * my_rank + 2;
    if (child1 < com_size) {
        num_children++;
        children = (int*)malloc(num_children * sizeof(int));
        children[0] = child1;
    }
    if (child2 < com_size) {
        num_children++;
        children = (int*)realloc(children, num_children * sizeof(int));
        children[1] = child2;
    }
    
    // Receive data from children
    if (my_rank != 0) {
      for (int i = 0; i < num_children; i++) {
          int* child_data = (int*)malloc(count * sizeof(int)); 
          MPI_Recv(child_data, count, MPI_INT, children[i], 0, communicator, MPI_STATUS_IGNORE);
          for (int j = 0; j < count; j++) {
              recv_data[j] += child_data[j];
          }
          free(child_data); 
      }
    }
    
    // Combine data with own data
    for (int i = 0; i < count; i++) {
   
    recv_data[i] += send_data[i];

    }

    // Send data to parent
    if (my_rank != 0) {
        MPI_Send(recv_data, count, MPI_INT, parent, 0, communicator);
    }
   
 
    // Root process collects final result
    if (my_rank == 0) {
        
        for (int i = 0; i < num_children; i++) {
            int* final_data = (int*)malloc(count * sizeof(int));
            MPI_Recv(final_data, count, MPI_INT, children[i], 0, communicator, MPI_STATUS_IGNORE);
            for (int j = 0; j < count; j++) {
              
                recv_data[j] = recv_data[j] + final_data[j];
            }
            free(final_data);
        }
        
        /*
        for (int i = 0; i < count; i++)
          { 
         
           printf("At index %i: tree_reduce_data is %i\n",
                    i, recv_data[i]);
                     
          }
          
          printf("\n");
          printf("\n");
        */
        
    }
 
   
}


void reduce_sequential(
    int* send_data,
    int* recv_data,
    int count,
    MPI_Comm communicator)
{
    int my_rank;
    int com_size;
    MPI_Comm_rank(communicator, &my_rank);
    MPI_Comm_size(communicator, &com_size);

    int* gather_buffer = NULL;
    if (my_rank == 0)
    {
        gather_buffer = (int*) calloc(count * com_size, sizeof(int));
    }

    MPI_Gather(send_data, count, MPI_INT, gather_buffer, count, MPI_INT, 0, communicator);
    
    if (my_rank == 0)
    {
        memset(recv_data, 0, count * sizeof(int));
        for (int p = 0; p < com_size; p++)
            for (int i = 0; i < count; i++)
                recv_data[i] += gather_buffer[count * p + i];
        free(gather_buffer);
    }
 
}



int main(int argc, char** args)
{

    MPI_Init(&argc, &args);
    int max_value = 10000000;
    int* recv_array_tree = NULL;
    int* recv_array_sequential = NULL;

    int my_rank;
    double sequential_time = 0.0;
    double tree_time = 0.0;
    MPI_Comm_rank(MPI_COMM_WORLD, &my_rank);

    for (int count = 10; count <= max_value; count *= 10) {
        if (my_rank == 0) {
            recv_array_tree = (int*) malloc(count * sizeof(int));
            recv_array_sequential = (int*) malloc(count * sizeof(int));
        }

        int* send_array = (int*) malloc(count * sizeof(int));
        for (int i = 0; i < count; i++)
            send_array[i] = my_rank;

        double time1 = 0.0;
        double time2 = 0.0;
        MPI_Barrier(MPI_COMM_WORLD);
        time1 -= MPI_Wtime();
        reduce_tree(send_array, recv_array_tree, count, MPI_COMM_WORLD);
        time1 += MPI_Wtime();
        tree_time = time1;

        MPI_Barrier(MPI_COMM_WORLD);
        time2 -= MPI_Wtime();
        reduce_sequential(send_array, recv_array_sequential, count, MPI_COMM_WORLD);
        time2 += MPI_Wtime();
        sequential_time = time2;

        if (my_rank == 0) {
            
            printf("\narray size for each process: %d \n", count);
            printf("reduce tree time is: %lf\n", tree_time);
            printf("reduce sequential time is: %lf\n\n", sequential_time);
            printf("************************************\n\n");
            for (int i = 0; i < count; i++) {
                if (recv_array_tree[i] != recv_array_sequential[i]) {
                    printf("At index %i: reduce_tree is %i, reduce_sequential is %i\n",
                        i, recv_array_tree[i], recv_array_sequential[i]);
                }
            }
 
            free(recv_array_tree);
            free(recv_array_sequential);
        }
        
        free(send_array);
    }

    MPI_Finalize();
    return 0;
    
    /*MPI_Init(&argc, &args);
    int count = 10;
    int max_value = 64;
    int* recv_array_tree = NULL;
    int* recv_array_sequential = NULL;

    int my_rank;
    double reduce_sequential_time = 0.0;
    double reduce_tree_time = 0.0;
    MPI_Comm_rank(MPI_COMM_WORLD, &my_rank);
    if (my_rank == 0)
    {
        recv_array_tree = (int*) malloc(count * sizeof(int));
        recv_array_sequential = (int*) malloc(count * sizeof(int));
    }

    int* send_array = (int*)malloc(count * sizeof(int));
    for (int i = 0; i < count; i++)
        send_array[i] = my_rank;
        
    double time = 0.0;
    
    MPI_Barrier(MPI_COMM_WORLD);
    time -= MPI_Wtime();
    reduce_tree(send_array, recv_array_tree, count, MPI_COMM_WORLD);
    time += MPI_Wtime();
    reduce_tree_time = time;
    
    
    MPI_Barrier(MPI_COMM_WORLD);
    time -= MPI_Wtime();
    reduce_sequential(send_array, recv_array_sequential, count, MPI_COMM_WORLD);
    time += MPI_Wtime();
    reduce_sequential_time = time;

    if (my_rank == 0)
    {
        printf("time to process reduce_tree when array size is %d  is : %lf\n",count , reduce_tree_time);
        printf("time to process reduce_sequential when array size is %d  is : %lf\n", count , reduce_sequential_time);
        
        for (int i = 0; i < count; i++)
            if (recv_array_tree[i] != recv_array_sequential[i])
                printf("At index %i: reduce_tree is %i, reduce_sequential is %i\n",
                    i, recv_array_tree[i], recv_array_sequential[i]);
 
        free(recv_array_tree);
        free(recv_array_sequential);
    }
    free(send_array);
    MPI_Finalize();
    return 0;*/
}


