Commit a8b39061 authored by tammam.alsoleman's avatar tammam.alsoleman

create logic file (TF-IDF)

parent b5da3f26
package com.distributed.search.logic; package com.distributed.search.logic;
import java.io.File;
import java.io.IOException;
import java.nio.file.Files;
import java.nio.file.Paths;
import java.util.Arrays;
import java.util.List;
import java.util.stream.Collectors;
public class FileManager { public class FileManager {
}
public static List<String> getSortedDocumentNames(String directoryPath) {
File directory = new File(directoryPath);
File[] files = directory.listFiles((dir, name) -> name.endsWith(".txt"));
if (files == null) return List.of();
return Arrays.stream(files)
.map(File::getName)
.sorted()
.collect(Collectors.toList());
}
public static String readDocumentContent(String directoryPath, String fileName) throws IOException {
return Files.readString(Paths.get(directoryPath, fileName));
}
}
\ No newline at end of file
package com.distributed.search.logic; package com.distributed.search.logic;
import java.util.Arrays;
import java.util.List;
public class TFIDFCalculator { public class TFIDFCalculator {
}
public static List<String> getWordsFromDocument(String content) {
return Arrays.asList(content.toLowerCase().split("(\\.)|(\\s)|(\\k)|(\\,)|(\\?)"));
}
public static double calculateTermFrequency(List<String> words, String term) {
long count = words.stream()
.filter(word -> word.equalsIgnoreCase(term))
.count();
return (double) count / words.size();
}
public static double calculateIdf(int totalDocuments, int documentsWithTerm) {
if (documentsWithTerm == 0) return 0;
return Math.log10((double) totalDocuments / documentsWithTerm);
}
}
\ No newline at end of file
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment