Commit 2dad8d38 authored by mohamad.alturky's avatar mohamad.alturky

refactoring, adding engine and documents representers

parent bba89b70
...@@ -2,10 +2,10 @@ package com.search.lucene; ...@@ -2,10 +2,10 @@ package com.search.lucene;
import java.io.IOException; import java.io.IOException;
import com.search.lucene.file.filters.TextFileFilter; import com.search.lucene.file.filters.implementations.TextFileFilter;
import com.search.lucene.indexers.TextFileIndexer; import com.search.lucene.indexers.implementations.TextFileIndexer;
import com.search.lucene.searchers.TextFileSearcher; import com.search.lucene.searchers.implementations.TextFileSearcher;
import com.search.lucene.settings.LuceneConstants; import com.search.lucene.settings.Constants;
import org.apache.lucene.document.Document; import org.apache.lucene.document.Document;
import org.apache.lucene.queryparser.classic.ParseException; import org.apache.lucene.queryparser.classic.ParseException;
import org.apache.lucene.search.ScoreDoc; import org.apache.lucene.search.ScoreDoc;
...@@ -31,10 +31,10 @@ public class Lucene { ...@@ -31,10 +31,10 @@ public class Lucene {
} }
private static void createIndex() throws IOException { private static void createIndex() throws IOException {
indexer = new TextFileIndexer(indexDir); indexer = new TextFileIndexer(indexDir, new TextFileFilter());
int numIndexed; int numIndexed;
long startTime = System.currentTimeMillis(); long startTime = System.currentTimeMillis();
numIndexed = indexer.createIndex(dataDir, new TextFileFilter()); numIndexed = indexer.createIndex(dataDir);
long endTime = System.currentTimeMillis(); long endTime = System.currentTimeMillis();
indexer.close(); indexer.close();
System.out.println(numIndexed+" File indexed, time taken: " System.out.println(numIndexed+" File indexed, time taken: "
...@@ -52,7 +52,7 @@ public class Lucene { ...@@ -52,7 +52,7 @@ public class Lucene {
for(ScoreDoc scoreDoc : hits.scoreDocs) { for(ScoreDoc scoreDoc : hits.scoreDocs) {
Document doc = textFileSearcher.getDocument(scoreDoc); Document doc = textFileSearcher.getDocument(scoreDoc);
System.out.println("File: " System.out.println("File: "
+ doc.get(LuceneConstants.FILE_PATH)); + doc.get(Constants.FILE_PATH));
} }
} }
} }
package com.search.lucene.documents.representers.abstractions;
import org.apache.lucene.document.Document;
public interface IDocumentRepresenter<T> {
T Represent(Document indexedDocument);
}
package com.search.lucene.documents.representers.implementations;
import com.search.lucene.documents.representers.abstractions.IDocumentRepresenter;
import org.apache.lucene.document.Document;
public class CSVDocumentStringRepresenter implements IDocumentRepresenter<String> {
@Override
public String Represent(Document indexedDocument) {
return null;
}
}
package com.search.lucene.documents.representers.implementations;
import com.search.lucene.documents.representers.abstractions.IDocumentRepresenter;
import org.apache.lucene.document.Document;
public class PDFDocumentStringRepresenter implements IDocumentRepresenter<String> {
@Override
public String Represent(Document indexedDocument) {
return null;
}
}
package com.search.lucene.documents.representers.implementations;
import com.search.lucene.documents.representers.abstractions.IDocumentRepresenter;
import org.apache.lucene.document.Document;
public class TextDocumentStringRepresenter implements IDocumentRepresenter<String> {
@Override
public String Represent(Document indexedDocument) {
return null;
}
}
package com.search.lucene.engine.abstractions;
import org.apache.lucene.document.Document;
import org.apache.lucene.queryparser.classic.ParseException;
import java.io.IOException;
import java.util.ArrayList;
public interface ISearchEngine {
void createIndexForDirectory(String directoryPath) throws IOException;
ArrayList<Document> search(String searchQuery) throws IOException, ParseException;
}
package com.search.lucene.engine.implementations;
import com.search.lucene.engine.abstractions.ISearchEngine;
import com.search.lucene.indexers.abstractions.IFileIndexer;
import com.search.lucene.searchers.abstractions.ISearcher;
import org.apache.lucene.document.Document;
import org.apache.lucene.queryparser.classic.ParseException;
import java.io.IOException;
import java.util.ArrayList;
public class LuceneEngine implements ISearchEngine {
private final ArrayList<IFileIndexer> indexers;
private final ArrayList<ISearcher> searchers;
public LuceneEngine(ArrayList<IFileIndexer> indexers, ArrayList<ISearcher> searchers) {
this.indexers = indexers;
this.searchers = searchers;
}
@Override
public void createIndexForDirectory(String directoryPath) throws IOException {
for (var indexer : indexers){
indexer.createIndex(directoryPath);
}
}
@Override
public ArrayList<Document> search(String searchQuery) throws IOException, ParseException {
return null;
}
}
package com.search.lucene.file.filters; package com.search.lucene.file.filters.implementations;
import com.search.lucene.file.filters.abstractions.IFileFilter; import com.search.lucene.file.filters.abstractions.IFileFilter;
......
package com.search.lucene.file.filters; package com.search.lucene.file.filters.implementations;
import com.search.lucene.file.filters.abstractions.IFileFilter; import com.search.lucene.file.filters.abstractions.IFileFilter;
......
package com.search.lucene.file.filters; package com.search.lucene.file.filters.implementations;
import com.search.lucene.file.filters.abstractions.IFileFilter; import com.search.lucene.file.filters.abstractions.IFileFilter;
......
...@@ -4,5 +4,5 @@ import java.io.FileFilter; ...@@ -4,5 +4,5 @@ import java.io.FileFilter;
import java.io.IOException; import java.io.IOException;
public interface IFileIndexer { public interface IFileIndexer {
int createIndex(String dataDirectoryPath, FileFilter filter) throws IOException; int createIndex(String dataDirectoryPath) throws IOException;
} }
package com.search.lucene.indexers; package com.search.lucene.indexers.implementations;
import com.opencsv.CSVReader; import com.opencsv.CSVReader;
import com.search.lucene.file.filters.abstractions.IFileFilter;
import com.search.lucene.indexers.abstractions.IFileIndexer; import com.search.lucene.indexers.abstractions.IFileIndexer;
import com.search.lucene.settings.LuceneConstants; import com.search.lucene.settings.Constants;
import org.apache.lucene.analysis.standard.StandardAnalyzer; import org.apache.lucene.analysis.standard.StandardAnalyzer;
import org.apache.lucene.document.Document; import org.apache.lucene.document.Document;
import org.apache.lucene.document.Field; import org.apache.lucene.document.Field;
...@@ -23,8 +24,10 @@ import java.util.ArrayList; ...@@ -23,8 +24,10 @@ import java.util.ArrayList;
public class CSVFileIndexer implements IFileIndexer { public class CSVFileIndexer implements IFileIndexer {
private final IndexWriter writer; private final IndexWriter writer;
private final IFileFilter filter;
public CSVFileIndexer(String indexDirectoryPath) throws IOException { public CSVFileIndexer(String indexDirectoryPath, IFileFilter filter) throws IOException {
this.filter = filter;
//this directory will contain the indexes //this directory will contain the indexes
Directory indexDirectory = Directory indexDirectory =
FSDirectory.open(Paths.get(indexDirectoryPath)); FSDirectory.open(Paths.get(indexDirectoryPath));
...@@ -53,11 +56,11 @@ public class CSVFileIndexer implements IFileIndexer { ...@@ -53,11 +56,11 @@ public class CSVFileIndexer implements IFileIndexer {
for (int i = 0; i < nextRecord.length; i++) { for (int i = 0; i < nextRecord.length; i++) {
Document document = new Document(); Document document = new Document();
document.add(new TextField(LuceneConstants.COLUMN, columns[i], Field.Store.YES)); document.add(new TextField(Constants.COLUMN, columns[i], Field.Store.YES));
document.add(new TextField(LuceneConstants.VALUE, nextRecord[i], Field.Store.YES)); document.add(new TextField(Constants.VALUE, nextRecord[i], Field.Store.YES));
document.add(new TextField(LuceneConstants.LINE, Integer.toString(line), Field.Store.YES)); document.add(new TextField(Constants.ROW, Integer.toString(line), Field.Store.YES));
document.add(new StringField(LuceneConstants.FILE_PATH, file.getCanonicalPath(), Field.Store.YES)); document.add(new StringField(Constants.FILE_PATH, file.getCanonicalPath(), Field.Store.YES));
document.add(new StringField(LuceneConstants.FILE_NAME, file.getName(), Field.Store.YES)); document.add(new StringField(Constants.FILE_NAME, file.getName(), Field.Store.YES));
documents.add(document); documents.add(document);
System.out.print(nextRecord[i] + "\t"); System.out.print(nextRecord[i] + "\t");
} }
...@@ -76,7 +79,7 @@ public class CSVFileIndexer implements IFileIndexer { ...@@ -76,7 +79,7 @@ public class CSVFileIndexer implements IFileIndexer {
} }
@Override @Override
public int createIndex(String dataDirectoryPath, FileFilter filter) public int createIndex(String dataDirectoryPath)
throws IOException { throws IOException {
File[] files = new File(dataDirectoryPath).listFiles(); File[] files = new File(dataDirectoryPath).listFiles();
......
package com.search.lucene.indexers; package com.search.lucene.indexers.implementations;
import com.search.lucene.file.filters.abstractions.IFileFilter;
import com.search.lucene.indexers.abstractions.IFileIndexer; import com.search.lucene.indexers.abstractions.IFileIndexer;
import com.search.lucene.settings.LuceneConstants; import com.search.lucene.settings.Constants;
import org.apache.lucene.analysis.standard.StandardAnalyzer; import org.apache.lucene.analysis.standard.StandardAnalyzer;
import org.apache.lucene.document.Document; import org.apache.lucene.document.Document;
import org.apache.lucene.document.TextField; import org.apache.lucene.document.TextField;
...@@ -17,18 +18,18 @@ import org.apache.pdfbox.text.PDFTextStripper; ...@@ -17,18 +18,18 @@ import org.apache.pdfbox.text.PDFTextStripper;
import java.io.File; import java.io.File;
import java.io.FileFilter; import java.io.FileFilter;
import java.io.FileReader;
import java.io.IOException; import java.io.IOException;
import java.nio.file.Paths; import java.nio.file.Paths;
public class PDFFileIndexer implements IFileIndexer { public class PDFFileIndexer implements IFileIndexer {
private final IndexWriter writer; private final IndexWriter writer;
private final IFileFilter filter;
public PDFFileIndexer(String indexDirectoryPath) throws IOException { public PDFFileIndexer(String indexDirectoryPath, IFileFilter fileFilter) throws IOException {
//this directory will contain the indexes //this directory will contain the indexes
Directory indexDirectory = Directory indexDirectory =
FSDirectory.open(Paths.get(indexDirectoryPath)); FSDirectory.open(Paths.get(indexDirectoryPath));
this.filter = fileFilter;
//create the indexer //create the indexer
writer = new IndexWriter(indexDirectory, new IndexWriterConfig(new StandardAnalyzer())); writer = new IndexWriter(indexDirectory, new IndexWriterConfig(new StandardAnalyzer()));
} }
...@@ -42,9 +43,9 @@ public class PDFFileIndexer implements IFileIndexer { ...@@ -42,9 +43,9 @@ public class PDFFileIndexer implements IFileIndexer {
PDDocument pdDocument = PDDocument.load(file); PDDocument pdDocument = PDDocument.load(file);
String content = new PDFTextStripper().getText(pdDocument); String content = new PDFTextStripper().getText(pdDocument);
Document document = new Document(); Document document = new Document();
document.add(new TextField(LuceneConstants.CONTENTS, content, Field.Store.YES)); document.add(new TextField(Constants.CONTENTS, content, Field.Store.YES));
document.add(new StringField(LuceneConstants.FILE_PATH, file.getCanonicalPath(), Field.Store.YES)); document.add(new StringField(Constants.FILE_PATH, file.getCanonicalPath(), Field.Store.YES));
document.add(new StringField(LuceneConstants.FILE_NAME, file.getName(), Field.Store.YES)); document.add(new StringField(Constants.FILE_NAME, file.getName(), Field.Store.YES));
writer.addDocument(document); writer.addDocument(document);
pdDocument.close(); pdDocument.close();
...@@ -58,7 +59,7 @@ public class PDFFileIndexer implements IFileIndexer { ...@@ -58,7 +59,7 @@ public class PDFFileIndexer implements IFileIndexer {
} }
@Override @Override
public int createIndex(String dataDirectoryPath, FileFilter filter) public int createIndex(String dataDirectoryPath)
throws IOException { throws IOException {
File[] files = new File(dataDirectoryPath).listFiles(); File[] files = new File(dataDirectoryPath).listFiles();
......
package com.search.lucene.indexers; package com.search.lucene.indexers.implementations;
import java.io.File; import java.io.File;
import java.io.FileFilter; import java.io.FileFilter;
...@@ -6,8 +6,9 @@ import java.io.FileReader; ...@@ -6,8 +6,9 @@ import java.io.FileReader;
import java.io.IOException; import java.io.IOException;
import java.nio.file.Paths; import java.nio.file.Paths;
import com.search.lucene.file.filters.abstractions.IFileFilter;
import com.search.lucene.indexers.abstractions.IFileIndexer; import com.search.lucene.indexers.abstractions.IFileIndexer;
import com.search.lucene.settings.LuceneConstants; import com.search.lucene.settings.Constants;
import org.apache.lucene.analysis.standard.StandardAnalyzer; import org.apache.lucene.analysis.standard.StandardAnalyzer;
import org.apache.lucene.document.Document; import org.apache.lucene.document.Document;
import org.apache.lucene.document.TextField; import org.apache.lucene.document.TextField;
...@@ -19,8 +20,10 @@ import org.apache.lucene.store.FSDirectory; ...@@ -19,8 +20,10 @@ import org.apache.lucene.store.FSDirectory;
public class TextFileIndexer implements IFileIndexer { public class TextFileIndexer implements IFileIndexer {
private final IndexWriter writer; private final IndexWriter writer;
private final IFileFilter filter;
public TextFileIndexer(String indexDirectoryPath) throws IOException { public TextFileIndexer(String indexDirectoryPath, IFileFilter filter) throws IOException {
this.filter = filter;
//this directory will contain the indexes //this directory will contain the indexes
Directory indexDirectory = Directory indexDirectory =
FSDirectory.open(Paths.get(indexDirectoryPath)); FSDirectory.open(Paths.get(indexDirectoryPath));
...@@ -36,12 +39,12 @@ public class TextFileIndexer implements IFileIndexer { ...@@ -36,12 +39,12 @@ public class TextFileIndexer implements IFileIndexer {
private Document getDocument(File file) throws IOException { private Document getDocument(File file) throws IOException {
Document document = new Document(); Document document = new Document();
TextField contentField = new TextField(LuceneConstants.CONTENTS, new FileReader(file)); TextField contentField = new TextField(Constants.CONTENTS, new FileReader(file));
TextField fileNameField = new TextField(LuceneConstants.FILE_NAME, TextField fileNameField = new TextField(Constants.FILE_NAME,
file.getName(),TextField.Store.YES); file.getName(),TextField.Store.YES);
TextField filePathField = new TextField(LuceneConstants.FILE_PATH, TextField filePathField = new TextField(Constants.FILE_PATH,
file.getCanonicalPath(),TextField.Store.YES); file.getCanonicalPath(),TextField.Store.YES);
document.add(contentField); document.add(contentField);
...@@ -58,7 +61,7 @@ public class TextFileIndexer implements IFileIndexer { ...@@ -58,7 +61,7 @@ public class TextFileIndexer implements IFileIndexer {
} }
@Override @Override
public int createIndex(String dataDirectoryPath, FileFilter filter) public int createIndex(String dataDirectoryPath)
throws IOException { throws IOException {
File[] files = new File(dataDirectoryPath).listFiles(); File[] files = new File(dataDirectoryPath).listFiles();
......
package com.search.lucene.searchers; package com.search.lucene.searchers.implementations;
import com.search.lucene.searchers.abstractions.ISearcher; import com.search.lucene.searchers.abstractions.ISearcher;
import com.search.lucene.settings.LuceneConstants; import com.search.lucene.settings.Constants;
import org.apache.lucene.analysis.standard.StandardAnalyzer; import org.apache.lucene.analysis.standard.StandardAnalyzer;
import org.apache.lucene.document.Document; import org.apache.lucene.document.Document;
import org.apache.lucene.index.CorruptIndexException; import org.apache.lucene.index.CorruptIndexException;
...@@ -31,7 +31,7 @@ public class CSVFileSearcher implements ISearcher { ...@@ -31,7 +31,7 @@ public class CSVFileSearcher implements ISearcher {
FSDirectory.open(Paths.get(indexDirectoryPath)); FSDirectory.open(Paths.get(indexDirectoryPath));
IndexReader reader = DirectoryReader.open(indexDirectory); IndexReader reader = DirectoryReader.open(indexDirectory);
indexSearcher = new IndexSearcher(reader); indexSearcher = new IndexSearcher(reader);
queryParser = new QueryParser(LuceneConstants.CONTENTS, queryParser = new QueryParser(Constants.CONTENTS,
new StandardAnalyzer()); new StandardAnalyzer());
} }
...@@ -39,7 +39,7 @@ public class CSVFileSearcher implements ISearcher { ...@@ -39,7 +39,7 @@ public class CSVFileSearcher implements ISearcher {
public TopDocs search( String searchQuery) public TopDocs search( String searchQuery)
throws IOException, ParseException { throws IOException, ParseException {
query = queryParser.parse(searchQuery); query = queryParser.parse(searchQuery);
return indexSearcher.search(query, LuceneConstants.MAX_SEARCH); return indexSearcher.search(query, Constants.MAX_SEARCH);
} }
@Override @Override
public Document getDocument(ScoreDoc scoreDoc) public Document getDocument(ScoreDoc scoreDoc)
......
package com.search.lucene.searchers; package com.search.lucene.searchers.implementations;
import com.search.lucene.searchers.abstractions.ISearcher; import com.search.lucene.searchers.abstractions.ISearcher;
import com.search.lucene.settings.LuceneConstants; import com.search.lucene.settings.Constants;
import org.apache.lucene.analysis.standard.StandardAnalyzer; import org.apache.lucene.analysis.standard.StandardAnalyzer;
import org.apache.lucene.document.Document; import org.apache.lucene.document.Document;
import org.apache.lucene.index.CorruptIndexException; import org.apache.lucene.index.CorruptIndexException;
...@@ -31,15 +31,15 @@ public class PDFFileSearcher implements ISearcher { ...@@ -31,15 +31,15 @@ public class PDFFileSearcher implements ISearcher {
FSDirectory.open(Paths.get(indexDirectoryPath)); FSDirectory.open(Paths.get(indexDirectoryPath));
IndexReader reader = DirectoryReader.open(indexDirectory); IndexReader reader = DirectoryReader.open(indexDirectory);
indexSearcher = new IndexSearcher(reader); indexSearcher = new IndexSearcher(reader);
queryParser = new QueryParser(LuceneConstants.CONTENTS, queryParser = new QueryParser(Constants.CONTENTS,
new StandardAnalyzer()); new StandardAnalyzer());
} }
@Override @Override
public TopDocs search( String searchQuery) public TopDocs search(String searchQuery)
throws IOException, ParseException { throws IOException, ParseException {
query = queryParser.parse(searchQuery); query = queryParser.parse(searchQuery);
return indexSearcher.search(query, LuceneConstants.MAX_SEARCH); return indexSearcher.search(query, Constants.MAX_SEARCH);
} }
@Override @Override
public Document getDocument(ScoreDoc scoreDoc) public Document getDocument(ScoreDoc scoreDoc)
......
package com.search.lucene.searchers; package com.search.lucene.searchers.implementations;
import java.io.IOException; import java.io.IOException;
import java.nio.file.Paths; import java.nio.file.Paths;
import com.search.lucene.searchers.abstractions.ISearcher; import com.search.lucene.searchers.abstractions.ISearcher;
import com.search.lucene.settings.LuceneConstants; import com.search.lucene.settings.Constants;
import org.apache.lucene.analysis.standard.StandardAnalyzer; import org.apache.lucene.analysis.standard.StandardAnalyzer;
import org.apache.lucene.document.Document; import org.apache.lucene.document.Document;
import org.apache.lucene.index.CorruptIndexException; import org.apache.lucene.index.CorruptIndexException;
...@@ -31,20 +31,19 @@ public class TextFileSearcher implements ISearcher { ...@@ -31,20 +31,19 @@ public class TextFileSearcher implements ISearcher {
FSDirectory.open(Paths.get(indexDirectoryPath)); FSDirectory.open(Paths.get(indexDirectoryPath));
IndexReader reader = DirectoryReader.open(indexDirectory); IndexReader reader = DirectoryReader.open(indexDirectory);
indexSearcher = new IndexSearcher(reader); indexSearcher = new IndexSearcher(reader);
queryParser = new QueryParser(LuceneConstants.CONTENTS, queryParser = new QueryParser(Constants.CONTENTS,
new StandardAnalyzer()); new StandardAnalyzer());
} }
@Override @Override
public TopDocs search( String searchQuery) public TopDocs search(String searchQuery)
throws IOException, ParseException { throws IOException, ParseException {
query = queryParser.parse(searchQuery); query = queryParser.parse(searchQuery);
return indexSearcher.search(query, LuceneConstants.MAX_SEARCH); return indexSearcher.search(query, Constants.MAX_SEARCH);
} }
@Override @Override
public Document getDocument(ScoreDoc scoreDoc) public Document getDocument(ScoreDoc scoreDoc)
throws CorruptIndexException, IOException { throws CorruptIndexException, IOException {
return indexSearcher.doc(scoreDoc.doc); return indexSearcher.doc(scoreDoc.doc);
} }
} }
\ No newline at end of file
package com.search.lucene.settings; package com.search.lucene.settings;
public class LuceneConstants { public class Constants {
public static final String CONTENTS = "contents"; public static final String CONTENTS = "contents";
public static final String INDEXED_FROM_FILE_FORMAT = "from_format";
public static final String FILE_NAME = "filename"; public static final String FILE_NAME = "filename";
public static final String FILE_PATH = "filepath"; public static final String FILE_PATH = "filepath";
public static final String COLUMN = "column"; public static final String COLUMN = "column";
......
package com.search.lucene.settings;
public enum IndexType {
CSV,
PDF,
TEXT
}
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment