Commit b8d18d92 authored by amira.alkatably's avatar amira.alkatably

Initial commit

parents
master
slave1 user=mpiuser
slave2 user=mpiuser
#include <stdio.h>
#include <mpi.h>
static long num_steps = 100000; // Number of steps
double step;
int main(int argc, char* argv[]) {
int rank, size;
double x, pi, sum = 0.0;
MPI_Init(&argc, &argv);
MPI_Comm_rank(MPI_COMM_WORLD, &rank);
MPI_Comm_size(MPI_COMM_WORLD, &size);
double start_time = MPI_Wtime(); // Start timing
step = 1.0 / (double)num_steps;
int local_steps = num_steps / size;
int extra_steps = num_steps % size; // Handle the remainder
int start = rank * local_steps + (rank < extra_steps ? rank : extra_steps);
int end = (rank + 1) * local_steps + (rank < extra_steps ? rank + 1 : extra_steps);
for (int i = start; i < end; i++) {
x = (i + 0.5) * step;
sum += step * x * x; // Adjusted expression for x*x
}
double local_pi = sum;
double global_pi;
MPI_Reduce(&local_pi, &global_pi, 1, MPI_DOUBLE, MPI_SUM, 0, MPI_COMM_WORLD);
double end_time = MPI_Wtime(); // Stop timing
if (rank == 0) {
printf("Approximate value of the integral: %lf\n", global_pi);
printf("Time taken: %lf seconds\n", end_time - start_time);
}
MPI_Finalize();
return 0;
}
#include <stdio.h>
#include <mpi.h>
#include <omp.h>
double f(double x) {
return x * x;
}
int main(int argc, char *argv[]) {
int rank, size;
int num_steps = 1000000; // adjust as needed
double step, sum, pi;
double start_time, end_time;
MPI_Init(&argc, &argv);
MPI_Comm_rank(MPI_COMM_WORLD, &rank);
MPI_Comm_size(MPI_COMM_WORLD, &size);
int local_steps = num_steps / size;
int extra_steps = num_steps % size;
int start = rank * local_steps + (rank < extra_steps ? rank : extra_steps);
int end = (rank + 1) * local_steps + (rank < extra_steps ? rank + 1 : extra_steps);
step = 1.0 / (double)num_steps;
sum = 0.0;
start_time = MPI_Wtime();
#pragma omp parallel for reduction(+:sum)
for (int i = start; i < end; i++) {
double x = (i + 0.5) * step;
sum += f(x);
}
MPI_Reduce(&sum, &pi, 1, MPI_DOUBLE, MPI_SUM, 0, MPI_COMM_WORLD);
end_time = MPI_Wtime();
if (rank == 0) {
pi *= step;
printf("Approximate value of the integral: %lf\n", pi);
printf("Time taken: %lf seconds\n", end_time - start_time);
}
MPI_Finalize();
return 0;
}
#include <stdio.h>
#include <omp.h>
static long num_steps = 100000; // Number of steps
double step;
int main() {
int i;
double x, pi, sum = 0.0;
double start_time = omp_get_wtime(); // Start timing
step = 1.0 / (double)num_steps;
#pragma omp parallel for private(x) reduction(+:sum)
for (i = 0; i < num_steps; i++) {
x = (i + 0.5) * step;
sum += step * x * x; // Adjusted expression for x*x
}
pi = sum;
double end_time = omp_get_wtime(); // Stop timing
printf("Approximate value of the integral: %lf\n", pi);
printf("Time taken: %lf seconds\n", end_time - start_time);
return 0;
}
#include <stdio.h>
#define N 10000
__global__ void vector_add(float *out, float *a, float *b, int n) {
int tid = blockIdx.x * blockDim.x + threadIdx.x;
if (tid < n) {
out[tid] = a[tid] + b[tid];
}
}
int main() {
float *a, *b, *out;
float *d_a, *d_b, *d_out;
// Allocate host memory
a = (float*)malloc(sizeof(float) * N);
b = (float*)malloc(sizeof(float) * N);
out = (float*)malloc(sizeof(float) * N);
// Initialize host arrays
for (int i = 0; i < N; i++) {
a[i] = i + 1;
b[i] = 26;
}
// Allocate device memory
cudaMalloc((void**)&d_a, sizeof(float) * N);
cudaMalloc((void**)&d_b, sizeof(float) * N);
cudaMalloc((void**)&d_out, sizeof(float) * N);
// Transfer data from host to device memory
cudaMemcpy(d_a, a, sizeof(float) * N, cudaMemcpyHostToDevice);
cudaMemcpy(d_b, b, sizeof(float) * N, cudaMemcpyHostToDevice);
// Adjust the block and grid dimensions for better parallelization
int blockSize = 256;
int numBlocks = (N + blockSize - 1) / blockSize;
// Executing kernel with multiple blocks
vector_add<<<numBlocks, blockSize>>>(d_out, d_a, d_b, N);
// Transfer data back to host memory
cudaMemcpy(out, d_out, sizeof(float) * N, cudaMemcpyDeviceToHost);
// Verification
// for(int i = 0; i < N; i++){
// printf("%f\n", out[i]);
// }
// Deallocate device memory
cudaFree(d_a);
cudaFree(d_b);
cudaFree(d_out);
// Deallocate host memory
free(a);
free(b);
free(out);
return 0;
}
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment