package com.distributed.search.logic;

import java.util.Arrays;
import java.util.List;

public class TFIDFCalculator {

    public static List<String> getWordsFromDocument(String content) {
        return Arrays.asList(content.toLowerCase().split("(\\.)|(\\s)|(\\k)|(\\,)|(\\?)"));
    }

    public static double calculateTermFrequency(List<String> words, String term) {
        long count = words.stream()
                .filter(word -> word.equalsIgnoreCase(term))
                .count();
        return (double) count / words.size();
    }

    public static double calculateIdf(int totalDocuments, int documentsWithTerm) {
        if (documentsWithTerm == 0) return 0;
        return Math.log10((double) totalDocuments / documentsWithTerm);
    }
}