Commit 32c043cd authored by mohamad.alturky's avatar mohamad.alturky

bug fixing

parent 337e7fee
#/csvIndex
#/pdfIndex
#/textIndex
#/textData
#/csvData
#/pdfData
#/data
/target
\ No newline at end of file
File added
File added
File added
File added
File added
File added
File added
File added
File added
File added
File added
File added
File added
File added
File added
File added
File added
File added
File added
File added
File added
File added
File added
File added
s1,Movie,Dick Johnson Is Dead,Kirsten Johnson,,United States,25-Sep-21,2020,PG-13,90 min,Documentaries,"As her father nears the end of his life, filmmaker Kirsten Johnson stages his death in inventive and comical ways to help them both face the inevitable."
\ No newline at end of file
s2,TV Show,Blood & Water,,"Ama Qamata, Khosi Ngema, Gail Mabalane, Thabang Molaba, Dillon Windvogel, Natasha Thahane, Arno Greeff, Xolile Tshabalala, Getmore Sithole, Cindy Mahlangu, Ryle De Morny, Greteli Fincham, Sello Maake Ka-Ncube, Odwa Gwanya, Mekaila Mathys, Sandi Schultz, Duane Williams, Shamilla Miller, Patrick Mofokeng",South Africa,24-Sep-21,2021,TV-MA,2 Seasons,"International TV Shows, TV Dramas, TV Mysteries","After crossing paths at a party, a Cape Town teen sets out to prove whether a private-school swimming star is her sister who was abducted at birth."
\ No newline at end of file
s3,TV Show,Ganglands,Julien Leclercq,"Sami Bouajila, Tracy Gotoas, Samuel Jouy, Nabiha Akkari, Sofia Lesaffre, Salim Kechiouche, Noureddine Farihi, Geert Van Rampelberg, Bakary Diombera",,24-Sep-21,2021,TV-MA,1 Season,"Crime TV Shows, International TV Shows, TV Action & Adventure","To protect his family from a powerful drug lord, skilled thief Mehdi and his expert team of robbers are pulled into a violent and deadly turf war."
\ No newline at end of file
s4,TV Show,Jailbirds New Orleans,,,,24-Sep-21,2021,TV-MA,1 Season,"Docuseries, Reality TV","Feuds, flirtations and toilet talk go down among the incarcerated women at the Orleans Justice Center in New Orleans on this gritty reality series."
\ No newline at end of file
s5,TV Show,Kota Factory,,"Mayur More, Jitendra Kumar, Ranjan Raj, Alam Khan, Ahsaas Channa, Revathi Pillai, Urvi Singh, Arun Kumar",India,24-Sep-21,2021,TV-MA,2 Seasons,"International TV Shows, Romantic TV Shows, TV Comedies","In a city of coaching centers known to train India’s finest collegiate minds, an earnest but unexceptional student and his friends navigate campus life."
\ No newline at end of file
s6,TV Show,Midnight Mass,Mike Flanagan,"Kate Siegel, Zach Gilford, Hamish Linklater, Henry Thomas, Kristin Lehman, Samantha Sloyan, Igby Rigney, Rahul Kohli, Annarah Cymone, Annabeth Gish, Alex Essoe, Rahul Abburi, Matt Biedel, Michael Trucco, Crystal Balint, Louis Oliver",,24-Sep-21,2021,TV-MA,1 Season,"TV Dramas, TV Horror, TV Mysteries","The arrival of a charismatic young priest brings glorious miracles, ominous mysteries and renewed religious fervor to a dying town desperate to believe."
\ No newline at end of file
s7,Movie,My Little Pony: A New Generation,"Robert Cullen, José Luis Ucha","Vanessa Hudgens, Kimiko Glenn, James Marsden, Sofia Carson, Liza Koshy, Ken Jeong, Elizabeth Perkins, Jane Krakowski, Michael McKean, Phil LaMarr",,24-Sep-21,2021,PG,91 min,Children & Family Movies,"Equestria's divided. But a bright-eyed hero believes Earth Ponies, Pegasi and Unicorns should be pals — and, hoof to heart, she’s determined to prove it."
\ No newline at end of file
s8,Movie,Sankofa,Haile Gerima,"Kofi Ghanaba, Oyafunmike Ogunlano, Alexandra Duah, Nick Medley, Mutabaruka, Afemo Omilami, Reggie Carter, Mzuri","United States, Ghana, Burkina Faso, United Kingdom, Germany, Ethiopia",24-Sep-21,1993,TV-MA,125 min,"Dramas, Independent Movies, International Movies","On a photo shoot in Ghana, an American model slips back in time, becomes enslaved on a plantation and bears witness to the agony of her ancestral past."
\ No newline at end of file
This source diff could not be displayed because it is too large. You can view the blob instead.
File added
File added
File added
File added
File added
File added
File added
File added
File added
File added
File added
File added
package com.search.lucene;
import java.io.IOException;
import java.util.ArrayList;
import com.search.lucene.documents.representers.resolver.DocumentToStringRepresenterResolver;
import com.search.lucene.documents.representers.resolver.IDocumentRepresenterResolver;
import com.search.lucene.engine.abstractions.ISearchEngine;
import com.search.lucene.engine.builder.abstractions.IEngineBuilder;
import com.search.lucene.engine.builder.implementations.LuceneEngineBuilder;
import com.search.lucene.indexers.implementations.TextFileIndexer;
import com.search.lucene.searchers.implementations.IndexedDocumentSearcher;
import com.search.lucene.settings.Constants;
......@@ -12,46 +18,60 @@ import org.apache.lucene.search.TopDocs;
public class Lucene {
private static final String indexDir = "index";
private static final String dataDir = "data";
private static TextFileIndexer indexer;
private static IndexedDocumentSearcher textFileSearcher;
public static void main(String[] args) throws IOException, ParseException {
IEngineBuilder builder = new LuceneEngineBuilder();
ISearchEngine searchEngine = builder.build();
searchEngine.createIndexesForDirectory("data");
public static void main(String[] args) {
try {
createIndex();
search("Kota");
search("s");
} catch (IOException e) {
e.printStackTrace();
} catch (ParseException e) {
e.printStackTrace();
ArrayList<Document> results = searchEngine.search("Julien Leclercq");
IDocumentRepresenterResolver<String> resolver = new DocumentToStringRepresenterResolver();
System.out.println();
for (var result : results) {
System.out.println(resolver.resolveRepresentation(result));
}
}
private static void createIndex() throws IOException {
indexer = new TextFileIndexer(indexDir);
int numIndexed;
long startTime = System.currentTimeMillis();
numIndexed = indexer.createIndex(dataDir);
long endTime = System.currentTimeMillis();
indexer.close();
System.out.println(numIndexed+" File indexed, time taken: "
+(endTime-startTime)+" ms");
}
private static void search(String searchQuery) throws IOException, ParseException {
textFileSearcher = new IndexedDocumentSearcher(indexDir,Constants.CONTENT);
long startTime = System.currentTimeMillis();
TopDocs hits = textFileSearcher.search(searchQuery);
long endTime = System.currentTimeMillis();
System.out.println(hits.totalHits +
" documents found. Time :" + (endTime - startTime));
for(ScoreDoc scoreDoc : hits.scoreDocs) {
Document doc = textFileSearcher.getDocument(scoreDoc);
System.out.println("File: "
+ doc.get(Constants.FILE_PATH));
}
}
// private static final String indexDir = "index";
// private static final String dataDir = "data";
// private static TextFileIndexer indexer;
// private static IndexedDocumentSearcher textFileSearcher;
//
// public static void main(String[] args) {
// try {
// createIndex();
// search("Kota");
// search("s");
// } catch (IOException e) {
// e.printStackTrace();
// } catch (ParseException e) {
// e.printStackTrace();
// }
// }
//
// private static void createIndex() throws IOException {
// indexer = new TextFileIndexer(indexDir);
// int numIndexed;
// long startTime = System.currentTimeMillis();
// numIndexed = indexer.createIndex(dataDir);
// long endTime = System.currentTimeMillis();
// indexer.close();
// System.out.println(numIndexed+" File indexed, time taken: "
// +(endTime-startTime)+" ms");
// }
//
// private static void search(String searchQuery) throws IOException, ParseException {
// textFileSearcher = new IndexedDocumentSearcher(indexDir,Constants.CONTENT);
// long startTime = System.currentTimeMillis();
// TopDocs hits = textFileSearcher.search(searchQuery);
// long endTime = System.currentTimeMillis();
//
// System.out.println(hits.totalHits +
// " documents found. Time :" + (endTime - startTime));
// for(ScoreDoc scoreDoc : hits.scoreDocs) {
// Document doc = textFileSearcher.getDocument(scoreDoc);
// System.out.println("File: "
// + doc.get(Constants.FILE_PATH));
// }
// }
}
......@@ -7,6 +7,6 @@ import java.io.IOException;
import java.util.ArrayList;
public interface ISearchEngine {
void createIndexForDirectory(String directoryPath) throws IOException;
void createIndexesForDirectory(String directoryPath) throws IOException;
ArrayList<Document> search(String searchQuery) throws IOException, ParseException;
}
......@@ -21,8 +21,9 @@ public class LuceneEngine implements ISearchEngine {
}
@Override
public void createIndexForDirectory(String directoryPath) throws IOException {
public void createIndexesForDirectory(String directoryPath) throws IOException {
for (IFileIndexer indexer : indexers) {
System.out.println("Starting indexing " + indexer.getClass().getName());
indexer.createIndex(directoryPath);
indexer.close();
}
......@@ -31,7 +32,8 @@ public class LuceneEngine implements ISearchEngine {
@Override
public ArrayList<Document> search(String searchQuery) throws IOException, ParseException {
ArrayList<Document> documents = new ArrayList<>();
for (ISearcher searcher : searchers){
for (ISearcher searcher : searchers) {
searcher.init();
TopDocs hits = searcher.search(searchQuery);
for(ScoreDoc scoreDoc : hits.scoreDocs) {
......@@ -40,5 +42,4 @@ public class LuceneEngine implements ISearchEngine {
}
return documents;
}
}
......@@ -73,16 +73,14 @@ public class CSVFileIndexer implements IFileIndexer {
document.add(new StringField(Constants.FILE_NAME, file.getName(), Field.Store.YES));
documents.add(document);
System.out.print(nextRecord[i] + "\t");
}
System.out.println();
}
return documents;
}
private void indexFile(File file) throws IOException {
System.out.println("Indexing " + file.getCanonicalPath());
ArrayList<Document> documents = getDocument(file);
for (var document : documents) {
writer.addDocument(document);
......@@ -94,7 +92,9 @@ public class CSVFileIndexer implements IFileIndexer {
throws IOException {
File[] files = new File(dataDirectoryPath).listFiles();
assert files != null;
if (files == null){
return -1000;
}
for (File file : files) {
if (!file.isDirectory()
......
......@@ -55,7 +55,7 @@ public class PDFFileIndexer implements IFileIndexer {
}
private void indexFile(File file) throws IOException {
System.out.println("Indexing "+file.getCanonicalPath());
Document document = getDocument(file);
writer.addDocument(document);
}
......@@ -63,6 +63,7 @@ public class PDFFileIndexer implements IFileIndexer {
@Override
public int createIndex(String dataDirectoryPath)
throws IOException {
File[] files = new File(dataDirectoryPath).listFiles();
assert files != null;
......
......@@ -58,7 +58,7 @@ public class TextFileIndexer implements IFileIndexer {
}
private void indexFile(File file) throws IOException {
System.out.println("Indexing "+file.getCanonicalPath());
Document document = getDocument(file);
writer.addDocument(document);
}
......
......@@ -12,4 +12,5 @@ import java.io.IOException;
public interface ISearcher {
Document getDocument(ScoreDoc scoreDoc) throws IOException;
TopDocs search(String searchQuery) throws IOException, ParseException;
void init() throws IOException;
}
......@@ -23,25 +23,33 @@ import org.apache.lucene.store.FSDirectory;
public class IndexedDocumentSearcher implements ISearcher {
private final IndexSearcher indexSearcher;
private final QueryParser queryParser;
private IndexSearcher indexSearcher;
private QueryParser queryParser;
private final String indexDirectoryPath;
private final String queryForField;
public IndexedDocumentSearcher(String indexDirectoryPath, String queryForField)
throws IOException {
Directory indexDirectory =
FSDirectory.open(Paths.get(indexDirectoryPath));
IndexReader reader = DirectoryReader.open(indexDirectory);
indexSearcher = new IndexSearcher(reader);
queryParser = new QueryParser(queryForField,
new StandardAnalyzer());
this.indexDirectoryPath = indexDirectoryPath;
this.queryForField = queryForField;
}
@Override
public TopDocs search(String searchQuery)
throws IOException, ParseException {
Query query = queryParser.parse(searchQuery);
return indexSearcher.search(query, Constants.MAX_SEARCH);
}
@Override
public void init() throws IOException {
Directory indexDirectory = FSDirectory.open(Paths.get(indexDirectoryPath));
IndexReader reader = DirectoryReader.open(indexDirectory);
indexSearcher = new IndexSearcher(reader);
queryParser = new QueryParser(queryForField, new StandardAnalyzer());
}
@Override
public Document getDocument(ScoreDoc scoreDoc)
throws CorruptIndexException, IOException {
......
File added
File added
File added
File added
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment