Commit 3fd570ea authored by mohamadbashar.disoki's avatar mohamadbashar.disoki

OpenMP CUDA

parents
# Makefile
# Source file
SRC = helloWorld.c
# Compiler
CC = gcc
# Executable name
EXE = ./out
all: $(EXE)
$(EXE): $(SRC)
$(CC) -fopenmp -o $(EXE) $(SRC)
clean:
rm -f $(EXE)
run:
$(EXE)
# OpenMP
![OpenMP Fork-join Model](image1.png)
# CUDA
* [CoLAB](https://colab.research.google.com/drive/1UP2TJvtJfgiU8x9g0iB8ffE5fj8B4aPV#scrollTo=tcS8TCGLz9m0)
* [CUDA Tutorial](https://cuda-tutorial.readthedocs.io/en/latest/tutorials/tutorial01/)
* [Machine Learning on GPU](https://hsf-training.github.io/hsf-training-ml-gpu-webpage/aio/index.html)
\ No newline at end of file
#include <stdio.h>
#include <stdlib.h>
__global__ void add(int *a, int *b) {
a[0]+= b[0];
}
int main() {
int a, b;
// host copies of variables a, b
int *d_a, *d_b;
// device copies of variables a, b
int size = sizeof(int);
// Allocate space for device copies of a, b
cudaMalloc(&d_a, size);
cudaMalloc(&d_b, size);
// Setup input values
a = 5;
b = 100;
// Copy inputs to device
cudaMemcpy(d_a, &a, size, cudaMemcpyHostToDevice);
cudaMemcpy(d_b, &b, size, cudaMemcpyHostToDevice);
// Launch add() kernel on GPU
add<<<1,1>>>(d_a, d_b);
// Copy result back to host
cudaError err = cudaMemcpy(&a, d_a, size, cudaMemcpyDeviceToHost);
if(err!=cudaSuccess) {
printf("CUDA error copying to Host: %s\n", cudaGetErrorString(err));
}
printf("result is %d\n",a);
// Cleanup
cudaFree(d_a);
cudaFree(d_b);
return 0;
}
\ No newline at end of file
#include "omp.h"
#include "stdlib.h"
#include "stdio.h"
//simple hello world parallel method
void startParallelHeloworld()
{
int x=0;
#pragma omp parallel num_threads(8)
{
int id = omp_get_thread_num();
//printf("Number of thread = %d\n", id);
if (id == 0)
printf("Hello From Master: => Threads Count is: %d \n", omp_get_num_threads());
else
printf("Hello from Thread %d\n", id);
for(int i=0;i<1000;i++){
x++;
}
}
//printf("Done x=%d",x);
}
//SPMD
void forExample(){
int i, k, N=20;
double A[N], B[N], C[N];
#pragma omp parallel for
for (i=0; i<N; i++) {
int id = omp_get_thread_num();
A[i] = B[i] + k*C[i];
printf("\t %d Tid: %d\n",i,id);
}
}
//Main method
int main(int count, char *arg[])
{
int nthreads;
nthreads = omp_get_num_threads();
//printf("Main Number of threads = %d\n", nthreads);
startParallelHeloworld();
printf("level1\n");
forExample();
printf("level2\n");
//sleep(1);
return 0;
}
File added
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment