#include <mpi.h>
#include <stdio.h>
#include <stdlib.h>
#include <time.h>

int main(int argc, char** argv) {
    MPI_Init(&argc, &argv);

    int world_rank, world_size;
    MPI_Comm_rank(MPI_COMM_WORLD, &world_rank);
    MPI_Comm_size(MPI_COMM_WORLD, &world_size);

    const int N = 1000;//  <-- Matrix size
    int* X = NULL;// The Main matrix
    int elements_per_proc = N / world_size;
    int remainder = N % world_size;// if it doesnt divide perfectly 

    int local_size = elements_per_proc + (world_rank < remainder ? 1 : 0); // the second part is used to divide the reminder one by one to the (first) processes 
    int* local_X = malloc(sizeof(int) * local_size);

    // Process 0 generates the Matrix using random seed everytime
    if (world_rank == 0) {
        X = malloc(sizeof(int) * N);
        srand(time(NULL));
        for (int i = 0; i < N; i++)
            X[i] = rand() % 100; // random numbers 0-99
    }

    // Determine counts and displacements for scattering


    //this needed because we will be using scatterv instead of scatter and that is because we have uneven way to divide the array (if we have a remainder)
    //this code can be ignored it is just a setup for the scatterv function
    //of it can be readed it is simple but it doesn't provide any logic beside specifiying each process chuck size
    int* counts = malloc(sizeof(int) * world_size);
    int* displs = malloc(sizeof(int) * world_size);
    int offset = 0;
    for (int i = 0; i < world_size; i++) {
        counts[i] = elements_per_proc + (i < remainder ? 1 : 0);
        displs[i] = offset;
        offset += counts[i];
    }

    // Scatter the array
    //main scattering
    MPI_Scatterv(X, counts, displs, MPI_INT, local_X, local_size, MPI_INT, 0, MPI_COMM_WORLD);

    // Print received elements
    
    printf("Process %d received: ", world_rank);
    for (int i = 0; i < local_size; i++)
        printf("%d ", local_X[i]);
    printf("\n");

    // Compute local average
    double local_sum = 0;
    for (int i = 0; i < local_size; i++)
        local_sum += local_X[i];
    double local_avg = local_sum / local_size;

    printf("Process %d local average = %f\n", world_rank, local_avg);

    // Compute global average using weighted sum
    //u can refere to global_average.pdf for this 
    //notice that we are using local_sum not local_avg here 
    //so this way is correct 
    double global_sum = 0;
    MPI_Reduce(&local_sum, &global_sum, 1, MPI_DOUBLE, MPI_SUM, 0, MPI_COMM_WORLD);

    if (world_rank == 0) {
        double global_avg = global_sum / N;
        printf("Global average = %f\n", global_avg);
    }

    free(local_X);
    free(counts);
    free(displs);
    if (world_rank == 0) free(X);

    MPI_Finalize();
    return 0;
}
