Commit 2dad8d38 authored by mohamad.alturky's avatar mohamad.alturky

refactoring, adding engine and documents representers

parent bba89b70
......@@ -2,10 +2,10 @@ package com.search.lucene;
import java.io.IOException;
import com.search.lucene.file.filters.TextFileFilter;
import com.search.lucene.indexers.TextFileIndexer;
import com.search.lucene.searchers.TextFileSearcher;
import com.search.lucene.settings.LuceneConstants;
import com.search.lucene.file.filters.implementations.TextFileFilter;
import com.search.lucene.indexers.implementations.TextFileIndexer;
import com.search.lucene.searchers.implementations.TextFileSearcher;
import com.search.lucene.settings.Constants;
import org.apache.lucene.document.Document;
import org.apache.lucene.queryparser.classic.ParseException;
import org.apache.lucene.search.ScoreDoc;
......@@ -31,10 +31,10 @@ public class Lucene {
}
private static void createIndex() throws IOException {
indexer = new TextFileIndexer(indexDir);
indexer = new TextFileIndexer(indexDir, new TextFileFilter());
int numIndexed;
long startTime = System.currentTimeMillis();
numIndexed = indexer.createIndex(dataDir, new TextFileFilter());
numIndexed = indexer.createIndex(dataDir);
long endTime = System.currentTimeMillis();
indexer.close();
System.out.println(numIndexed+" File indexed, time taken: "
......@@ -52,7 +52,7 @@ public class Lucene {
for(ScoreDoc scoreDoc : hits.scoreDocs) {
Document doc = textFileSearcher.getDocument(scoreDoc);
System.out.println("File: "
+ doc.get(LuceneConstants.FILE_PATH));
+ doc.get(Constants.FILE_PATH));
}
}
}
package com.search.lucene.documents.representers.abstractions;
import org.apache.lucene.document.Document;
public interface IDocumentRepresenter<T> {
T Represent(Document indexedDocument);
}
package com.search.lucene.documents.representers.implementations;
import com.search.lucene.documents.representers.abstractions.IDocumentRepresenter;
import org.apache.lucene.document.Document;
public class CSVDocumentStringRepresenter implements IDocumentRepresenter<String> {
@Override
public String Represent(Document indexedDocument) {
return null;
}
}
package com.search.lucene.documents.representers.implementations;
import com.search.lucene.documents.representers.abstractions.IDocumentRepresenter;
import org.apache.lucene.document.Document;
public class PDFDocumentStringRepresenter implements IDocumentRepresenter<String> {
@Override
public String Represent(Document indexedDocument) {
return null;
}
}
package com.search.lucene.documents.representers.implementations;
import com.search.lucene.documents.representers.abstractions.IDocumentRepresenter;
import org.apache.lucene.document.Document;
public class TextDocumentStringRepresenter implements IDocumentRepresenter<String> {
@Override
public String Represent(Document indexedDocument) {
return null;
}
}
package com.search.lucene.engine.abstractions;
import org.apache.lucene.document.Document;
import org.apache.lucene.queryparser.classic.ParseException;
import java.io.IOException;
import java.util.ArrayList;
public interface ISearchEngine {
void createIndexForDirectory(String directoryPath) throws IOException;
ArrayList<Document> search(String searchQuery) throws IOException, ParseException;
}
package com.search.lucene.engine.implementations;
import com.search.lucene.engine.abstractions.ISearchEngine;
import com.search.lucene.indexers.abstractions.IFileIndexer;
import com.search.lucene.searchers.abstractions.ISearcher;
import org.apache.lucene.document.Document;
import org.apache.lucene.queryparser.classic.ParseException;
import java.io.IOException;
import java.util.ArrayList;
public class LuceneEngine implements ISearchEngine {
private final ArrayList<IFileIndexer> indexers;
private final ArrayList<ISearcher> searchers;
public LuceneEngine(ArrayList<IFileIndexer> indexers, ArrayList<ISearcher> searchers) {
this.indexers = indexers;
this.searchers = searchers;
}
@Override
public void createIndexForDirectory(String directoryPath) throws IOException {
for (var indexer : indexers){
indexer.createIndex(directoryPath);
}
}
@Override
public ArrayList<Document> search(String searchQuery) throws IOException, ParseException {
return null;
}
}
package com.search.lucene.file.filters;
package com.search.lucene.file.filters.implementations;
import com.search.lucene.file.filters.abstractions.IFileFilter;
......
package com.search.lucene.file.filters;
package com.search.lucene.file.filters.implementations;
import com.search.lucene.file.filters.abstractions.IFileFilter;
......
package com.search.lucene.file.filters;
package com.search.lucene.file.filters.implementations;
import com.search.lucene.file.filters.abstractions.IFileFilter;
......
......@@ -4,5 +4,5 @@ import java.io.FileFilter;
import java.io.IOException;
public interface IFileIndexer {
int createIndex(String dataDirectoryPath, FileFilter filter) throws IOException;
int createIndex(String dataDirectoryPath) throws IOException;
}
package com.search.lucene.indexers;
package com.search.lucene.indexers.implementations;
import com.opencsv.CSVReader;
import com.search.lucene.file.filters.abstractions.IFileFilter;
import com.search.lucene.indexers.abstractions.IFileIndexer;
import com.search.lucene.settings.LuceneConstants;
import com.search.lucene.settings.Constants;
import org.apache.lucene.analysis.standard.StandardAnalyzer;
import org.apache.lucene.document.Document;
import org.apache.lucene.document.Field;
......@@ -23,8 +24,10 @@ import java.util.ArrayList;
public class CSVFileIndexer implements IFileIndexer {
private final IndexWriter writer;
private final IFileFilter filter;
public CSVFileIndexer(String indexDirectoryPath) throws IOException {
public CSVFileIndexer(String indexDirectoryPath, IFileFilter filter) throws IOException {
this.filter = filter;
//this directory will contain the indexes
Directory indexDirectory =
FSDirectory.open(Paths.get(indexDirectoryPath));
......@@ -53,11 +56,11 @@ public class CSVFileIndexer implements IFileIndexer {
for (int i = 0; i < nextRecord.length; i++) {
Document document = new Document();
document.add(new TextField(LuceneConstants.COLUMN, columns[i], Field.Store.YES));
document.add(new TextField(LuceneConstants.VALUE, nextRecord[i], Field.Store.YES));
document.add(new TextField(LuceneConstants.LINE, Integer.toString(line), Field.Store.YES));
document.add(new StringField(LuceneConstants.FILE_PATH, file.getCanonicalPath(), Field.Store.YES));
document.add(new StringField(LuceneConstants.FILE_NAME, file.getName(), Field.Store.YES));
document.add(new TextField(Constants.COLUMN, columns[i], Field.Store.YES));
document.add(new TextField(Constants.VALUE, nextRecord[i], Field.Store.YES));
document.add(new TextField(Constants.ROW, Integer.toString(line), Field.Store.YES));
document.add(new StringField(Constants.FILE_PATH, file.getCanonicalPath(), Field.Store.YES));
document.add(new StringField(Constants.FILE_NAME, file.getName(), Field.Store.YES));
documents.add(document);
System.out.print(nextRecord[i] + "\t");
}
......@@ -76,7 +79,7 @@ public class CSVFileIndexer implements IFileIndexer {
}
@Override
public int createIndex(String dataDirectoryPath, FileFilter filter)
public int createIndex(String dataDirectoryPath)
throws IOException {
File[] files = new File(dataDirectoryPath).listFiles();
......
package com.search.lucene.indexers;
package com.search.lucene.indexers.implementations;
import com.search.lucene.file.filters.abstractions.IFileFilter;
import com.search.lucene.indexers.abstractions.IFileIndexer;
import com.search.lucene.settings.LuceneConstants;
import com.search.lucene.settings.Constants;
import org.apache.lucene.analysis.standard.StandardAnalyzer;
import org.apache.lucene.document.Document;
import org.apache.lucene.document.TextField;
......@@ -17,18 +18,18 @@ import org.apache.pdfbox.text.PDFTextStripper;
import java.io.File;
import java.io.FileFilter;
import java.io.FileReader;
import java.io.IOException;
import java.nio.file.Paths;
public class PDFFileIndexer implements IFileIndexer {
private final IndexWriter writer;
private final IFileFilter filter;
public PDFFileIndexer(String indexDirectoryPath) throws IOException {
public PDFFileIndexer(String indexDirectoryPath, IFileFilter fileFilter) throws IOException {
//this directory will contain the indexes
Directory indexDirectory =
FSDirectory.open(Paths.get(indexDirectoryPath));
this.filter = fileFilter;
//create the indexer
writer = new IndexWriter(indexDirectory, new IndexWriterConfig(new StandardAnalyzer()));
}
......@@ -42,9 +43,9 @@ public class PDFFileIndexer implements IFileIndexer {
PDDocument pdDocument = PDDocument.load(file);
String content = new PDFTextStripper().getText(pdDocument);
Document document = new Document();
document.add(new TextField(LuceneConstants.CONTENTS, content, Field.Store.YES));
document.add(new StringField(LuceneConstants.FILE_PATH, file.getCanonicalPath(), Field.Store.YES));
document.add(new StringField(LuceneConstants.FILE_NAME, file.getName(), Field.Store.YES));
document.add(new TextField(Constants.CONTENTS, content, Field.Store.YES));
document.add(new StringField(Constants.FILE_PATH, file.getCanonicalPath(), Field.Store.YES));
document.add(new StringField(Constants.FILE_NAME, file.getName(), Field.Store.YES));
writer.addDocument(document);
pdDocument.close();
......@@ -58,7 +59,7 @@ public class PDFFileIndexer implements IFileIndexer {
}
@Override
public int createIndex(String dataDirectoryPath, FileFilter filter)
public int createIndex(String dataDirectoryPath)
throws IOException {
File[] files = new File(dataDirectoryPath).listFiles();
......
package com.search.lucene.indexers;
package com.search.lucene.indexers.implementations;
import java.io.File;
import java.io.FileFilter;
......@@ -6,8 +6,9 @@ import java.io.FileReader;
import java.io.IOException;
import java.nio.file.Paths;
import com.search.lucene.file.filters.abstractions.IFileFilter;
import com.search.lucene.indexers.abstractions.IFileIndexer;
import com.search.lucene.settings.LuceneConstants;
import com.search.lucene.settings.Constants;
import org.apache.lucene.analysis.standard.StandardAnalyzer;
import org.apache.lucene.document.Document;
import org.apache.lucene.document.TextField;
......@@ -19,8 +20,10 @@ import org.apache.lucene.store.FSDirectory;
public class TextFileIndexer implements IFileIndexer {
private final IndexWriter writer;
private final IFileFilter filter;
public TextFileIndexer(String indexDirectoryPath) throws IOException {
public TextFileIndexer(String indexDirectoryPath, IFileFilter filter) throws IOException {
this.filter = filter;
//this directory will contain the indexes
Directory indexDirectory =
FSDirectory.open(Paths.get(indexDirectoryPath));
......@@ -36,12 +39,12 @@ public class TextFileIndexer implements IFileIndexer {
private Document getDocument(File file) throws IOException {
Document document = new Document();
TextField contentField = new TextField(LuceneConstants.CONTENTS, new FileReader(file));
TextField contentField = new TextField(Constants.CONTENTS, new FileReader(file));
TextField fileNameField = new TextField(LuceneConstants.FILE_NAME,
TextField fileNameField = new TextField(Constants.FILE_NAME,
file.getName(),TextField.Store.YES);
TextField filePathField = new TextField(LuceneConstants.FILE_PATH,
TextField filePathField = new TextField(Constants.FILE_PATH,
file.getCanonicalPath(),TextField.Store.YES);
document.add(contentField);
......@@ -58,7 +61,7 @@ public class TextFileIndexer implements IFileIndexer {
}
@Override
public int createIndex(String dataDirectoryPath, FileFilter filter)
public int createIndex(String dataDirectoryPath)
throws IOException {
File[] files = new File(dataDirectoryPath).listFiles();
......
package com.search.lucene.searchers;
package com.search.lucene.searchers.implementations;
import com.search.lucene.searchers.abstractions.ISearcher;
import com.search.lucene.settings.LuceneConstants;
import com.search.lucene.settings.Constants;
import org.apache.lucene.analysis.standard.StandardAnalyzer;
import org.apache.lucene.document.Document;
import org.apache.lucene.index.CorruptIndexException;
......@@ -31,7 +31,7 @@ public class CSVFileSearcher implements ISearcher {
FSDirectory.open(Paths.get(indexDirectoryPath));
IndexReader reader = DirectoryReader.open(indexDirectory);
indexSearcher = new IndexSearcher(reader);
queryParser = new QueryParser(LuceneConstants.CONTENTS,
queryParser = new QueryParser(Constants.CONTENTS,
new StandardAnalyzer());
}
......@@ -39,7 +39,7 @@ public class CSVFileSearcher implements ISearcher {
public TopDocs search( String searchQuery)
throws IOException, ParseException {
query = queryParser.parse(searchQuery);
return indexSearcher.search(query, LuceneConstants.MAX_SEARCH);
return indexSearcher.search(query, Constants.MAX_SEARCH);
}
@Override
public Document getDocument(ScoreDoc scoreDoc)
......
package com.search.lucene.searchers;
package com.search.lucene.searchers.implementations;
import com.search.lucene.searchers.abstractions.ISearcher;
import com.search.lucene.settings.LuceneConstants;
import com.search.lucene.settings.Constants;
import org.apache.lucene.analysis.standard.StandardAnalyzer;
import org.apache.lucene.document.Document;
import org.apache.lucene.index.CorruptIndexException;
......@@ -31,15 +31,15 @@ public class PDFFileSearcher implements ISearcher {
FSDirectory.open(Paths.get(indexDirectoryPath));
IndexReader reader = DirectoryReader.open(indexDirectory);
indexSearcher = new IndexSearcher(reader);
queryParser = new QueryParser(LuceneConstants.CONTENTS,
queryParser = new QueryParser(Constants.CONTENTS,
new StandardAnalyzer());
}
@Override
public TopDocs search( String searchQuery)
public TopDocs search(String searchQuery)
throws IOException, ParseException {
query = queryParser.parse(searchQuery);
return indexSearcher.search(query, LuceneConstants.MAX_SEARCH);
return indexSearcher.search(query, Constants.MAX_SEARCH);
}
@Override
public Document getDocument(ScoreDoc scoreDoc)
......
package com.search.lucene.searchers;
package com.search.lucene.searchers.implementations;
import java.io.IOException;
import java.nio.file.Paths;
import com.search.lucene.searchers.abstractions.ISearcher;
import com.search.lucene.settings.LuceneConstants;
import com.search.lucene.settings.Constants;
import org.apache.lucene.analysis.standard.StandardAnalyzer;
import org.apache.lucene.document.Document;
import org.apache.lucene.index.CorruptIndexException;
......@@ -31,20 +31,19 @@ public class TextFileSearcher implements ISearcher {
FSDirectory.open(Paths.get(indexDirectoryPath));
IndexReader reader = DirectoryReader.open(indexDirectory);
indexSearcher = new IndexSearcher(reader);
queryParser = new QueryParser(LuceneConstants.CONTENTS,
queryParser = new QueryParser(Constants.CONTENTS,
new StandardAnalyzer());
}
@Override
public TopDocs search( String searchQuery)
public TopDocs search(String searchQuery)
throws IOException, ParseException {
query = queryParser.parse(searchQuery);
return indexSearcher.search(query, LuceneConstants.MAX_SEARCH);
return indexSearcher.search(query, Constants.MAX_SEARCH);
}
@Override
public Document getDocument(ScoreDoc scoreDoc)
throws CorruptIndexException, IOException {
return indexSearcher.doc(scoreDoc.doc);
}
}
\ No newline at end of file
package com.search.lucene.settings;
public class LuceneConstants {
public class Constants {
public static final String CONTENTS = "contents";
public static final String INDEXED_FROM_FILE_FORMAT = "from_format";
public static final String FILE_NAME = "filename";
public static final String FILE_PATH = "filepath";
public static final String COLUMN = "column";
......
package com.search.lucene.settings;
public enum IndexType {
CSV,
PDF,
TEXT
}
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment