Commit 76f853fa authored by sahar.moalla's avatar sahar.moalla

final_commit

parent a3e18f29
#include <stdio.h>
#define N 10000
#define BLOCK_SIZE 1024
__global__ void vector_add(float *out, float *a, float *b, int n) {
int tid = blockIdx.x * blockDim.x + threadIdx.x;
int stride = blockDim.x * gridDim.x;
for (int i = tid; i < n; i += stride) {
out[i] = a[i] + b[i];
}
}
int main() {
float *a, *b, *out;
float *d_a, *d_b, *d_out;
// Allocate host memory
a = (float*)malloc(sizeof(float) * N);
b = (float*)malloc(sizeof(float) * N);
out = (float*)malloc(sizeof(float) * N);
// Initialize host arrays
for(int i = 0; i < N; i++){
a[i] = i+1;
b[i] = 26;
}
// Allocate device memory
cudaMalloc((void**)&d_a, sizeof(float) * N);
cudaMalloc((void**)&d_b, sizeof(float) * N);
cudaMalloc((void**)&d_out, sizeof(float) * N);
// Transfer data from host to device memory
cudaMemcpy(d_a, a, sizeof(float) * N, cudaMemcpyHostToDevice);
cudaMemcpy(d_b, b, sizeof(float) * N, cudaMemcpyHostToDevice);
// Adjust the block and grid dimensions for better parallelization
int grid_size = (N + BLOCK_SIZE - 1) / BLOCK_SIZE;
// Executing kernel with multiple blocks
vector_add<<<grid_size, BLOCK_SIZE>>>(d_out, d_a, d_b, N);
// Transfer data back to host memory
cudaMemcpy(out, d_out, sizeof(float) * N, cudaMemcpyDeviceToHost);
// Verification
// for(int i = 0; i < N; i++){
// printf("%f\n", out[i]);
// }
// Deallocate device memory
cudaFree(d_a);
cudaFree(d_b);
cudaFree(d_out);
// Deallocate host memory
free(a);
free(b);
free(out);
return 0;
}
# Makefile for MPI program
# Source file
SRC = calculate_area.c
# number of process
NP = 4
# Compiler
CC = mpicc
# Executable name
EXE = ./out
all: $(EXE)
$(EXE): $(SRC)
$(CC) -o $(EXE) $(SRC)
clean:
rm -f $(EXE)
run:
mpirun -np $(NP) -f mpihost $(EXE)
\ No newline at end of file
#include <stdio.h>
#include <stdlib.h> // Include for malloc and free
#include <mpi.h>
// Function to calculate the partial area under the curve X^2 for a given range
double calculate_partial_area(double start, double end, int num_steps, int rank, int size) {
double width = (end - start) / num_steps;
double partial_area = 0.0;
// Loop over the subintervals assigned to the current process
for (int i = rank; i < num_steps; i += size) {
double x = start + i * width;
double height = x * x;
partial_area += height * width;
}
return partial_area;
}
int main(int argc, char *argv[]) {
MPI_Init(&argc, &argv);
int rank, size;
MPI_Comm_rank(MPI_COMM_WORLD, &rank);
MPI_Comm_size(MPI_COMM_WORLD, &size);
int num_steps = 1000000; // Adjust based on the required precision
// Measure execution time
double start_time = MPI_Wtime();
// Calculate the partial area for the current process
double partial_area = calculate_partial_area(0.0, 1.0, num_steps, rank, size);
double *all_partial_areas = NULL;
if (rank == 0) {
// Allocate memory for storing partial areas at the root process
all_partial_areas = (double *)malloc(size * sizeof(double));
}
// Gather the partial areas from all processes to the root process
MPI_Gather(&partial_area, 1, MPI_DOUBLE, all_partial_areas, 1, MPI_DOUBLE, 0, MPI_COMM_WORLD);
// Stop measuring time
double end_time = MPI_Wtime();
double elapsed_time = end_time - start_time;
if (rank == 0) {
// Sum up the partial areas to get the total area
double total_area = 0.0;
for (int i = 0; i < size; i++) {
total_area += all_partial_areas[i];
}
// Print the results
printf("Total Area (MPI): %f\n", total_area);
printf("Time: %f seconds\n", elapsed_time);
// Free the allocated memory at the root process
free(all_partial_areas);
}
MPI_Finalize();
return 0;
}
master
slave1 user=mpiuser
slave2 user=mpiuser
# Makefile
# Source file
SRC = calculate_area.c
# Compiler
CC = gcc
# Executable name
EXE = ./out
all: $(EXE)
$(EXE): $(SRC)
$(CC) -fopenmp -o $(EXE) $(SRC)
clean:
rm -f $(EXE)
run:
$(EXE)
#include <stdio.h>
#include <omp.h>
// Function to calculate the partial area under the curve X^2 for a given range
double calculate_partial_area(double start, double end, int num_steps) {
double width = (end - start) / num_steps;
double partial_area = 0.0;
// Parallel region with private_partial_area for each thread
#pragma omp parallel
{
double private_partial_area = 0.0;
// Parallel loop to calculate partial areas for each thread
#pragma omp for
for (int i = 0; i < num_steps; i++) {
double x = start + i * width;
double height = x * x;
private_partial_area += height * width;
}
// Combine the partial areas from each thread using a critical section
#pragma omp critical
partial_area += private_partial_area;
}
return partial_area;
}
int main() {
int num_steps = 1000000; // Adjust based on the required precision
// Measure execution time
double start_time = omp_get_wtime();
// Calculate the total area using OpenMP parallelization
double total_area = calculate_partial_area(0.0, 1.0, num_steps);
// Stop measuring time
double end_time = omp_get_wtime();
double elapsed_time = end_time - start_time;
// Print the results
printf("Total Area (OpenMP): %f\n", total_area);
printf("Time: %f seconds\n", elapsed_time);
return 0;
}
# Makefile for MPI + OpenMP program
# Source file
SRC = calculate_area.c
# Number of processes
NP = 4
# Compiler
CC = mpicc
# Executable name
EXE = ./out
all: $(EXE)
$(EXE): $(SRC)
$(CC) -o $(EXE) -fopenmp $(SRC)
clean:
rm -f $(EXE)
mpihost:
mpirun --hostfile mpihost -np $(NP) hostname > mpihost
run: mpihost
mpirun -np $(NP) -hostfile mpihost $(EXE)
#include <stdio.h>
#include <stdlib.h>
#include <mpi.h>
#include <omp.h>
// Function to calculate the partial area under the curve X^2 for a given range
double calculate_partial_area(double start, double end, int num_steps, int rank, int size) {
double width = (end - start) / num_steps;
double partial_area = 0.0;
// Use OpenMP parallelization within each MPI process
#pragma omp parallel reduction(+:partial_area)
{
// Get the number of threads and the ID of each thread
int num_threads = omp_get_num_threads();
int thread_id = omp_get_thread_num();
// Loop over the subintervals assigned to the current thread within the MPI process
for (int i = thread_id + rank * num_threads; i < num_steps; i += size * num_threads) {
double x = start + i * width;
double height = x * x;
partial_area += height * width;
}
}
return partial_area;
}
int main(int argc, char *argv[]) {
MPI_Init(&argc, &argv);
int rank, size;
MPI_Comm_rank(MPI_COMM_WORLD, &rank);
MPI_Comm_size(MPI_COMM_WORLD, &size);
int num_steps = 1000000; // Adjust based on the required precision
// Measure execution time
double start_time = MPI_Wtime();
// Calculate the partial area for the current MPI process using OpenMP parallelization
double partial_area = calculate_partial_area(0.0, 1.0, num_steps, rank, size);
double *all_partial_areas = NULL;
if (rank == 0) {
// Allocate memory for storing partial areas at the root process
all_partial_areas = (double *)malloc(size * sizeof(double));
}
// Gather the partial areas from all MPI processes to the root process
MPI_Gather(&partial_area, 1, MPI_DOUBLE, all_partial_areas, 1, MPI_DOUBLE, 0, MPI_COMM_WORLD);
// Stop measuring time
double end_time = MPI_Wtime();
double elapsed_time = end_time - start_time;
if (rank == 0) {
// Sum up the partial areas to get the total area
double total_area = 0.0;
for (int i = 0; i < size; i++) {
total_area += all_partial_areas[i];
}
// Print the results
printf("Total Area (MPI + OpenMP): %f\n", total_area);
printf("Time: %f seconds\n", elapsed_time);
// Free the allocated memory at the root process
free(all_partial_areas);
}
MPI_Finalize();
return 0;
}
master
master
master
master
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment