Solved second question

71dd1e0d · drnull03 · 4f59304b · 71dd1e0d · 71dd1e0d · 71dd1e0d
Commit 71dd1e0d authored Jan 13, 2026 by drnull03
13 changed files
--- a/Q1/screenshots/cluster.png
+++ b/Q1/screenshots/cluster.png
--- a/Q1/screenshots/local.png
+++ b/Q1/screenshots/local.png
--- a/Q2/Note
+++ b/Q2/Note
--- a/Q2/depricatedPythonCode/reduce.py
+++ b/Q2/depricatedPythonCode/reduce.py
+from mpi4py import MPI
+import numpy as np
+
+def reduce_tree(sendbuf, root=0):
+    """
+    Tree-based reduce (sum) for a 1D numpy array
+    Returns result only at root
+    """
+    comm = MPI.COMM_WORLD
+    rank = comm.Get_rank()
+    size = comm.Get_size()
+
+    local = sendbuf.copy()
+
+    left = 2*rank + 1
+    right = 2*rank + 2
+    parent = (rank-1)//2 if rank != root else None
+
+    
+    if left < size:
+        tmp = np.empty_like(local)
+        comm.Recv(tmp, source=left, tag=0)
+        local += tmp
+
+    
+    if right < size:
+        tmp = np.empty_like(local)
+        comm.Recv(tmp, source=right, tag=0)
+        local += tmp
+
+    
+    if rank != root:
+        comm.Send(local, dest=parent, tag=0)
+        return None
+    else:
+        return local
+
+def sequential_reduce(all_data):
+    return np.sum(all_data, axis=0)
+
+if __name__ == "__main__":
+    comm = MPI.COMM_WORLD
+    rank = comm.Get_rank()
+    size = comm.Get_size()
+
+    import sys
+    if len(sys.argv) != 2:
+        if rank == 0:
+            print("Usage: python reduce.py <array_size>")
+        sys.exit(0)
+
+    N = int(sys.argv[1])
+    np.random.seed(rank+1)
+    local_array = np.random.randint(0, 10, size=N)
+
+    comm.Barrier()
+    t1 = MPI.Wtime()
+    result = reduce_tree(local_array)
+    comm.Barrier()
+    t2 = MPI.Wtime()
+    parallel_time = t2 - t1
+
+    
+    all_data = comm.gather(local_array, root=0)
+
+    if rank == 0:
+        t3 = MPI.Wtime()
+        seq_result = sequential_reduce(np.array(all_data))
+        t4 = MPI.Wtime()
+        sequential_time = t4 - t3
+
+        print(f"Processes: {size}")
+        print(f"Array size: {N}")
+        print(f"Parallel reduce time: {parallel_time:.6f} s")
+        print(f"Sequential reduce time: {sequential_time:.6f} s")
+       
+        correct = np.array_equal(seq_result, result)
+        print(f"Correct: {correct}")
--- a/Q2/reduce
+++ b/Q2/reduce
--- a/Q2/reduce.c
+++ b/Q2/reduce.c
+#include <mpi.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+
+// This was so ugly man 
+void reduce_tree(int *sendbuf, int *recvbuf, int count, int root, MPI_Comm comm) {
+    int rank, size;
+    MPI_Comm_rank(comm, &rank);
+    MPI_Comm_size(comm, &size);
+
+    int *local = (int*)malloc(count * sizeof(int));
+    memcpy(local, sendbuf, count * sizeof(int));
+
+    int left = 2*rank + 1;
+    int right = 2*rank + 2;
+    int parent = (rank == root) ? -1 : (rank - 1)/2;
+    MPI_Status status;
+
+    if (left < size) {
+        int *tmp = (int*)malloc(count * sizeof(int));
+        MPI_Recv(tmp, count, MPI_INT, left, 0, comm, &status);
+        for(int i=0;i<count;i++) local[i]+=tmp[i];
+        free(tmp);
+    }
+    if (right < size) {
+        int *tmp = (int*)malloc(count * sizeof(int));
+        MPI_Recv(tmp, count, MPI_INT, right, 0, comm, &status);
+        for(int i=0;i<count;i++) local[i]+=tmp[i];
+        free(tmp);
+    }
+    if(rank != root)
+        MPI_Send(local, count, MPI_INT, parent, 0, comm);
+    else
+        memcpy(recvbuf, local, count*sizeof(int));
+
+    free(local);
+}
+
+
+void sequential_reduce(int *all_data, int num_procs, int count, int *result) {
+    for(int i=0;i<count;i++) result[i]=0;
+    for(int p=0;p<num_procs;p++)
+        for(int i=0;i<count;i++)
+            result[i] += all_data[p*count + i];
+}
+
+int main(int argc, char **argv) {
+    MPI_Init(&argc,&argv);
+
+    int rank,size;
+    MPI_Comm_rank(MPI_COMM_WORLD,&rank);
+    MPI_Comm_size(MPI_COMM_WORLD,&size);
+
+    if(argc!=2){
+        if(rank==0) printf("Usage: %s <array_size>\n",argv[0]);
+        MPI_Finalize();
+        return 0;
+    }
+
+    int N = atoi(argv[1]);
+    int *sendbuf = (int*)malloc(N*sizeof(int));
+    srand(rank+1);
+    for(int i=0;i<N;i++) sendbuf[i]=rand()%10;
+
+    int *recvbuf = (rank==0)?malloc(N*sizeof(int)):NULL;
+
+    MPI_Barrier(MPI_COMM_WORLD);
+    double t1 = MPI_Wtime();
+    reduce_tree(sendbuf,recvbuf,N,0,MPI_COMM_WORLD);
+    MPI_Barrier(MPI_COMM_WORLD);
+    double t2 = MPI_Wtime();
+    double parallel_time = t2-t1;
+
+    
+    int *all_data = NULL;
+    if(rank==0) all_data = (int*)malloc(size*N*sizeof(int));
+    MPI_Gather(sendbuf,N,MPI_INT,all_data,N,MPI_INT,0,MPI_COMM_WORLD);
+
+    double t3 = MPI_Wtime();
+    int *seq_result=NULL;
+    if(rank==0){
+        seq_result = (int*)malloc(N*sizeof(int));
+        sequential_reduce(all_data,size,N,seq_result);
+    }
+    double t4 = MPI_Wtime();
+    double sequential_time = t4-t3;
+
+    if(rank==0){
+        printf("Processes: %d\n",size);
+        printf("Array size: %d\n",N);
+        printf("Parallel reduce time: %f s\n",parallel_time);
+        printf("Sequential reduce time: %f s\n",sequential_time);
+        // correctness
+        int correct=1;
+        for(int i=0;i<N;i++) if(seq_result[i]!=recvbuf[i]) {correct=0; break;}
+        printf("Correct: %s\n",correct?"True":"False");
+        free(all_data);
+        free(seq_result);
+        free(recvbuf);
+    }
+
+    free(sendbuf);
+    MPI_Finalize();
+    return 0;
+}
+
--- a/Q2/topology/cluster_backbone.xml
+++ b/Q2/topology/cluster_backbone.xml
+<?xml version='1.0'?>
+<!DOCTYPE platform SYSTEM "http://simgrid.gforge.inria.fr/simgrid/simgrid.dtd">
+<platform version="4.1">
+  <!--             _________
+                  |          |
+                  |  router  |
+      ____________|__________|_____________ backbone link
+        |   |   |              |     |   |  
+      l0| l1| l2|           l97| l96 |   | l99
+        |   |   |   ........   |     |   |
+        |                                |
+    node-0.simgrid.org            node-99.simgrid.org
+
+    The route from node-0 to node-2 is: l0.UP ; backbone ; l2.DOWN
+
+    The route from node-0 to the outer world begins with: l0.UP ; backbone
+  -->
+  <cluster id="cluster0" prefix="node-" radical="0-99" suffix=".simgrid.org"
+	   speed="1Gf" bw="125MBps" lat="50us"
+           bb_bw="2.25GBps"  bb_lat="500us"/>
+</platform>
--- a/Q2/topology/cluster_crossbar.xml
+++ b/Q2/topology/cluster_crossbar.xml
+<?xml version='1.0'?>
+<!DOCTYPE platform SYSTEM "http://simgrid.gforge.inria.fr/simgrid/simgrid.dtd">
+<platform version="4.1">
+  <!--           _________
+                |          |
+                | crossbar |
+                |__________|
+                    / | \
+                   /  |  \
+               l0 / l1|   \l2
+                 /    |    \
+                /     |     \
+           node-0   node-1   node-2 ...
+	    
+    All hosts can communicate at full speed with no interference on
+    the crossbar. Only the links of each hosts are limiting.	    
+  -->
+  <zone id="world" routing="Full">
+    <cluster id="cluster-crossbar" 
+             prefix="node-" radical="0-65536" suffix=".simgrid.org"
+	     speed="1Gf" bw="125MBps" lat="50us"/>
+  </zone>
+</platform>
--- a/Q2/topology/cluster_dragonfly.xml
+++ b/Q2/topology/cluster_dragonfly.xml
+<?xml version='1.0'?>
+<!DOCTYPE platform SYSTEM "http://simgrid.gforge.inria.fr/simgrid/simgrid.dtd">
+<platform version="4.1">
+  <zone id="world" routing="Full">
+    <cluster id="bob_cluster" topology="DRAGONFLY" topo_parameters="3,4;4,3;5,1;2"
+             prefix="node-" radical="0-119" suffix=".simgrid.org"
+	     speed="1Gf" bw="125MBps" lat="50us"
+             loopback_bw="100MBps" loopback_lat="0" limiter_link="150MBps"/>
+  </zone>
+</platform>
--- a/Q2/topology/cluster_fat_tree.xml
+++ b/Q2/topology/cluster_fat_tree.xml
+<?xml version='1.0'?>
+<!DOCTYPE platform SYSTEM "http://simgrid.gforge.inria.fr/simgrid/simgrid.dtd">
+<platform version="4.1">
+  <!-- This is an example for a fat tree cluster. 
+  This is taken from figure 1/ b/ of the paper "D-Mod-K Routing Providing on-Blocking Traffic for Shift Permutations on 
+  Real Life Fat Trees" available at http://webee.eedev.technion.ac.il/wp-content/uploads/2014/08/publication_574.pdf
+  This defines a two levels fat-tree, with 4 leaf switches connected to 4 nodes each and 2 core switches connected to 
+  each leaf switch by two cables -->
+
+  <zone id="world" routing="Full">
+    <cluster id="bob_cluster"
+	     prefix="node-" radical="0-15" suffix=".simgrid.org"
+	     speed="1Gf" bw="125MBps" lat="50us" 
+             topology="FAT_TREE" topo_parameters="2;4,4;1,2;1,2"
+	     loopback_bw="100MBps" loopback_lat="0" />
+  </zone>
+</platform>
--- a/Q2/topology/cluster_torus.xml
+++ b/Q2/topology/cluster_torus.xml
+<?xml version='1.0'?>
+<!DOCTYPE platform SYSTEM "http://simgrid.gforge.inria.fr/simgrid/simgrid.dtd">
+<platform version="4.1">
+  <zone id="world" routing="Full">
+    <cluster id="bob_cluster" topology="TORUS" topo_parameters="3,2,2"
+	     prefix="node-" radical="0-11" suffix=".simgrid.org"
+	     speed="1Gf" bw="125MBps" lat="50us"
+	     loopback_bw="100MBps" loopback_lat="0"/>
+  </zone>
+</platform>
--- a/Q2/topology/testing.xml
+++ b/Q2/topology/testing.xml
+<?xml version="1.0"?>
+<!DOCTYPE platform SYSTEM "https://simgrid.org/simgrid.dtd">
+<platform version="4.1">
+
+  <zone id="cluster" routing="Full">
+
+    <!-- Define hosts -->
+    <host id="master" speed="10Gf"/>
+    <host id="slave1" speed="10Gf"/>
+    <host id="slave2" speed="10Gf"/>
+
+    <!-- Define network link -->
+    <link id="network" bandwidth="1GBps" latency="0.1ms"/>
+
+    <!-- Connect hosts to the link -->
+    <route src="master" dst="slave1"><link_ctn id="network"/></route>
+    <route src="master" dst="slave2"><link_ctn id="network"/></route>
+    <route src="slave1" dst="slave2"><link_ctn id="network"/></route>
+
+  </zone>
+</platform>
--- a/gitlab.txt
+++ b/gitlab.txt
+https://git.hiast.edu.sy/diaa.hanna/mpi_prefix_sum