import org.apache.zookeeper.*;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

import java.io.File;
import java.util.*;

public class Autohealer implements Watcher {
    private static final Logger logger = LoggerFactory.getLogger(Autohealer.class);

    private static final String ZK = "127.0.0.1:2181";
    private static final int TIMEOUT = 3000;

    private static final String NODES = "/nodes";
    private static final String WORKERS = "/workers";

    private final int desiredWorkers;
    private final String workerJar;
    private ZooKeeper zk;

    public Autohealer(int desiredWorkers, String workerJar) {
        this.desiredWorkers = desiredWorkers;
        this.workerJar = workerJar;
    }

    public void connect() throws Exception {
        zk = new ZooKeeper(ZK, TIMEOUT, this);
    }

    public void bootstrap() throws Exception {
        ensure(NODES);
        ensure(WORKERS);

        rebalance();
    }

    private void ensure(String path) throws Exception {
        if (zk.exists(path, false) == null) {
            zk.create(path, new byte[]{}, ZooDefs.Ids.OPEN_ACL_UNSAFE, CreateMode.PERSISTENT);
        }
    }

    public void run() throws InterruptedException {
        synchronized (zk) {
            zk.wait();
        }
    }

    @Override
    public void process(WatchedEvent e) {
        try {
            rebalance();
        } catch (Exception ex) {
            logger.error("Rebalance failed", ex);
        }
    }

    private void rebalance() throws Exception {
        List<String> nodes = zk.getChildren(NODES, this);
        List<String> workers = zk.getChildren(WORKERS, this);

        if (nodes.isEmpty()) return;

        Map<String, List<String>> byNode = new HashMap<>();
        for (String n : nodes) byNode.put(n, new ArrayList<>());

        for (String w : workers) {
            byte[] data = zk.getData(WORKERS + "/" + w, false, null);
            String node = new String(data);
            byNode.computeIfAbsent(node, k -> new ArrayList<>()).add(w);
        }

        // worker failure → restart on same node
        while (workers.size() < desiredWorkers) {
            String node = leastLoaded(byNode);
            startWorker(node);
            workers.add("new");
        }

        // node failure → redistribute
        for (String deadNode : new HashSet<>(byNode.keySet())) {
            if (!nodes.contains(deadNode)) {
                for (String w : byNode.get(deadNode)) {
                    String target = leastLoaded(byNode);
                    startWorker(target);
                }
                byNode.remove(deadNode);
            }
        }
    }

    private String leastLoaded(Map<String, List<String>> map) {
        return map.entrySet()
                .stream()
                .min(Comparator.comparingInt(e -> e.getValue().size()))
                .get().getKey();
    }

    private void startWorker(String node) throws Exception {
        File jar = new File(workerJar);

        ProcessBuilder pb = new ProcessBuilder(
                "java", "-jar", jar.getAbsolutePath()
        );
        pb.environment().put("NODE_ID", node);
        pb.start();

        logger.info("Started worker on {}", node);
    }
}
