#include <iostream>
#include <vector>
#include <thread>
#include <mutex>
#include <chrono>

using namespace std;
const int DIM = 1000;
const int THREADS = 4;

vector<vector<long>> a(DIM, vector<long>(DIM));
vector<vector<long>> b(DIM, vector<long>(DIM));
vector<vector<long>> c(DIM, vector<long>(DIM));

mutex mtx;

void init() {
    for(int i=0; i<DIM; i++)
        for(int j=0; j<DIM; j++) {
            a[i][j] = i+j;
            b[i][j] = i-j;
            c[i][j] = 0;
        }
}

void seq() {
    for(int i=0; i<DIM; i++)
        for(int j=0; j<DIM; j++) {
            long s=0;
            for(int k=0; k<DIM; k++)
                s += a[i][k]*b[k][j];
            c[i][j]=s;
        }
}

void para_nosync(int id) {
    int start=id*(DIM/THREADS);
    int end=(id==THREADS-1)?DIM:start+(DIM/THREADS);
    for(int i=start; i<end; i++)
        for(int j=0; j<DIM; j++) {
            long s=0;
            for(int k=0; k<DIM; k++)
                s += a[i][k]*b[k][j];
            c[i][j]=s;
        }
}

void para_sync(int id) {
    int start=id*(DIM/THREADS);
    int end=(id==THREADS-1)?DIM:start+(DIM/THREADS);
    for(int i=start; i<end; i++)
        for(int j=0; j<DIM; j++) {
            long s=0;
            for(int k=0; k<DIM; k++)
                s += a[i][k]*b[k][j];
            lock_guard<mutex> lock(mtx);
            c[i][j]=s;
        }
}

void reset() {
    for(auto& row : c)
        fill(row.begin(), row.end(), 0);
}

double time_func(void (*func)()) {
    auto start = chrono::high_resolution_clock::now();
    func();
    auto end = chrono::high_resolution_clock::now();
    return chrono::duration<double>(end-start).count();
}

int main() {
    init();
    
    reset(); double t1 = time_func(seq); cout << "Seq: " << t1 << "s\n";
    
    reset(); double t2 = time_func([](){
        thread(para_nosync,0).join();
    }); cout << "1 thread: " << t2 << "s\n";
    
    reset(); double t3 = time_func([](){
        thread t[THREADS];
        for(int i=0;i<THREADS;i++) t[i]=thread(para_nosync,i);
        for(int i=0;i<THREADS;i++) t[i].join();
    }); cout << THREADS << " threads: " << t3 << "s\n";
    
    reset(); double t4 = time_func([](){
        thread t[THREADS];
        for(int i=0;i<THREADS;i++) t[i]=thread(para_sync,i);
        for(int i=0;i<THREADS;i++) t[i].join();
    }); cout << THREADS << " threads sync: " << t4 << "s\n";
    
    return 0;
}