Commit 71dd1e0d authored by drnull03's avatar drnull03

Solved second question

parent 4f59304b
from mpi4py import MPI
import numpy as np
def reduce_tree(sendbuf, root=0):
"""
Tree-based reduce (sum) for a 1D numpy array
Returns result only at root
"""
comm = MPI.COMM_WORLD
rank = comm.Get_rank()
size = comm.Get_size()
local = sendbuf.copy()
left = 2*rank + 1
right = 2*rank + 2
parent = (rank-1)//2 if rank != root else None
if left < size:
tmp = np.empty_like(local)
comm.Recv(tmp, source=left, tag=0)
local += tmp
if right < size:
tmp = np.empty_like(local)
comm.Recv(tmp, source=right, tag=0)
local += tmp
if rank != root:
comm.Send(local, dest=parent, tag=0)
return None
else:
return local
def sequential_reduce(all_data):
return np.sum(all_data, axis=0)
if __name__ == "__main__":
comm = MPI.COMM_WORLD
rank = comm.Get_rank()
size = comm.Get_size()
import sys
if len(sys.argv) != 2:
if rank == 0:
print("Usage: python reduce.py <array_size>")
sys.exit(0)
N = int(sys.argv[1])
np.random.seed(rank+1)
local_array = np.random.randint(0, 10, size=N)
comm.Barrier()
t1 = MPI.Wtime()
result = reduce_tree(local_array)
comm.Barrier()
t2 = MPI.Wtime()
parallel_time = t2 - t1
all_data = comm.gather(local_array, root=0)
if rank == 0:
t3 = MPI.Wtime()
seq_result = sequential_reduce(np.array(all_data))
t4 = MPI.Wtime()
sequential_time = t4 - t3
print(f"Processes: {size}")
print(f"Array size: {N}")
print(f"Parallel reduce time: {parallel_time:.6f} s")
print(f"Sequential reduce time: {sequential_time:.6f} s")
correct = np.array_equal(seq_result, result)
print(f"Correct: {correct}")
File added
#include <mpi.h>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
// This was so ugly man
void reduce_tree(int *sendbuf, int *recvbuf, int count, int root, MPI_Comm comm) {
int rank, size;
MPI_Comm_rank(comm, &rank);
MPI_Comm_size(comm, &size);
int *local = (int*)malloc(count * sizeof(int));
memcpy(local, sendbuf, count * sizeof(int));
int left = 2*rank + 1;
int right = 2*rank + 2;
int parent = (rank == root) ? -1 : (rank - 1)/2;
MPI_Status status;
if (left < size) {
int *tmp = (int*)malloc(count * sizeof(int));
MPI_Recv(tmp, count, MPI_INT, left, 0, comm, &status);
for(int i=0;i<count;i++) local[i]+=tmp[i];
free(tmp);
}
if (right < size) {
int *tmp = (int*)malloc(count * sizeof(int));
MPI_Recv(tmp, count, MPI_INT, right, 0, comm, &status);
for(int i=0;i<count;i++) local[i]+=tmp[i];
free(tmp);
}
if(rank != root)
MPI_Send(local, count, MPI_INT, parent, 0, comm);
else
memcpy(recvbuf, local, count*sizeof(int));
free(local);
}
void sequential_reduce(int *all_data, int num_procs, int count, int *result) {
for(int i=0;i<count;i++) result[i]=0;
for(int p=0;p<num_procs;p++)
for(int i=0;i<count;i++)
result[i] += all_data[p*count + i];
}
int main(int argc, char **argv) {
MPI_Init(&argc,&argv);
int rank,size;
MPI_Comm_rank(MPI_COMM_WORLD,&rank);
MPI_Comm_size(MPI_COMM_WORLD,&size);
if(argc!=2){
if(rank==0) printf("Usage: %s <array_size>\n",argv[0]);
MPI_Finalize();
return 0;
}
int N = atoi(argv[1]);
int *sendbuf = (int*)malloc(N*sizeof(int));
srand(rank+1);
for(int i=0;i<N;i++) sendbuf[i]=rand()%10;
int *recvbuf = (rank==0)?malloc(N*sizeof(int)):NULL;
MPI_Barrier(MPI_COMM_WORLD);
double t1 = MPI_Wtime();
reduce_tree(sendbuf,recvbuf,N,0,MPI_COMM_WORLD);
MPI_Barrier(MPI_COMM_WORLD);
double t2 = MPI_Wtime();
double parallel_time = t2-t1;
int *all_data = NULL;
if(rank==0) all_data = (int*)malloc(size*N*sizeof(int));
MPI_Gather(sendbuf,N,MPI_INT,all_data,N,MPI_INT,0,MPI_COMM_WORLD);
double t3 = MPI_Wtime();
int *seq_result=NULL;
if(rank==0){
seq_result = (int*)malloc(N*sizeof(int));
sequential_reduce(all_data,size,N,seq_result);
}
double t4 = MPI_Wtime();
double sequential_time = t4-t3;
if(rank==0){
printf("Processes: %d\n",size);
printf("Array size: %d\n",N);
printf("Parallel reduce time: %f s\n",parallel_time);
printf("Sequential reduce time: %f s\n",sequential_time);
// correctness
int correct=1;
for(int i=0;i<N;i++) if(seq_result[i]!=recvbuf[i]) {correct=0; break;}
printf("Correct: %s\n",correct?"True":"False");
free(all_data);
free(seq_result);
free(recvbuf);
}
free(sendbuf);
MPI_Finalize();
return 0;
}
<?xml version='1.0'?>
<!DOCTYPE platform SYSTEM "http://simgrid.gforge.inria.fr/simgrid/simgrid.dtd">
<platform version="4.1">
<!-- _________
| |
| router |
____________|__________|_____________ backbone link
| | | | | |
l0| l1| l2| l97| l96 | | l99
| | | ........ | | |
| |
node-0.simgrid.org node-99.simgrid.org
The route from node-0 to node-2 is: l0.UP ; backbone ; l2.DOWN
The route from node-0 to the outer world begins with: l0.UP ; backbone
-->
<cluster id="cluster0" prefix="node-" radical="0-99" suffix=".simgrid.org"
speed="1Gf" bw="125MBps" lat="50us"
bb_bw="2.25GBps" bb_lat="500us"/>
</platform>
<?xml version='1.0'?>
<!DOCTYPE platform SYSTEM "http://simgrid.gforge.inria.fr/simgrid/simgrid.dtd">
<platform version="4.1">
<!-- _________
| |
| crossbar |
|__________|
/ | \
/ | \
l0 / l1| \l2
/ | \
/ | \
node-0 node-1 node-2 ...
All hosts can communicate at full speed with no interference on
the crossbar. Only the links of each hosts are limiting.
-->
<zone id="world" routing="Full">
<cluster id="cluster-crossbar"
prefix="node-" radical="0-65536" suffix=".simgrid.org"
speed="1Gf" bw="125MBps" lat="50us"/>
</zone>
</platform>
<?xml version='1.0'?>
<!DOCTYPE platform SYSTEM "http://simgrid.gforge.inria.fr/simgrid/simgrid.dtd">
<platform version="4.1">
<zone id="world" routing="Full">
<cluster id="bob_cluster" topology="DRAGONFLY" topo_parameters="3,4;4,3;5,1;2"
prefix="node-" radical="0-119" suffix=".simgrid.org"
speed="1Gf" bw="125MBps" lat="50us"
loopback_bw="100MBps" loopback_lat="0" limiter_link="150MBps"/>
</zone>
</platform>
<?xml version='1.0'?>
<!DOCTYPE platform SYSTEM "http://simgrid.gforge.inria.fr/simgrid/simgrid.dtd">
<platform version="4.1">
<!-- This is an example for a fat tree cluster.
This is taken from figure 1/ b/ of the paper "D-Mod-K Routing Providing on-Blocking Traffic for Shift Permutations on
Real Life Fat Trees" available at http://webee.eedev.technion.ac.il/wp-content/uploads/2014/08/publication_574.pdf
This defines a two levels fat-tree, with 4 leaf switches connected to 4 nodes each and 2 core switches connected to
each leaf switch by two cables -->
<zone id="world" routing="Full">
<cluster id="bob_cluster"
prefix="node-" radical="0-15" suffix=".simgrid.org"
speed="1Gf" bw="125MBps" lat="50us"
topology="FAT_TREE" topo_parameters="2;4,4;1,2;1,2"
loopback_bw="100MBps" loopback_lat="0" />
</zone>
</platform>
<?xml version='1.0'?>
<!DOCTYPE platform SYSTEM "http://simgrid.gforge.inria.fr/simgrid/simgrid.dtd">
<platform version="4.1">
<zone id="world" routing="Full">
<cluster id="bob_cluster" topology="TORUS" topo_parameters="3,2,2"
prefix="node-" radical="0-11" suffix=".simgrid.org"
speed="1Gf" bw="125MBps" lat="50us"
loopback_bw="100MBps" loopback_lat="0"/>
</zone>
</platform>
<?xml version="1.0"?>
<!DOCTYPE platform SYSTEM "https://simgrid.org/simgrid.dtd">
<platform version="4.1">
<zone id="cluster" routing="Full">
<!-- Define hosts -->
<host id="master" speed="10Gf"/>
<host id="slave1" speed="10Gf"/>
<host id="slave2" speed="10Gf"/>
<!-- Define network link -->
<link id="network" bandwidth="1GBps" latency="0.1ms"/>
<!-- Connect hosts to the link -->
<route src="master" dst="slave1"><link_ctn id="network"/></route>
<route src="master" dst="slave2"><link_ctn id="network"/></route>
<route src="slave1" dst="slave2"><link_ctn id="network"/></route>
</zone>
</platform>
https://git.hiast.edu.sy/diaa.hanna/mpi_prefix_sum
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment