Skip to content
Projects
Groups
Snippets
Help
Loading...
Help
Submit feedback
Sign in
Toggle navigation
L
lucene
Project
Project
Details
Activity
Releases
Cycle Analytics
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Charts
Issues
0
Issues
0
List
Board
Labels
Milestones
Merge Requests
0
Merge Requests
0
CI / CD
CI / CD
Pipelines
Jobs
Schedules
Charts
Wiki
Wiki
Snippets
Snippets
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Charts
Create a new issue
Jobs
Commits
Issue Boards
Open sidebar
mohamad.alturky
lucene
Commits
82c3cdab
Commit
82c3cdab
authored
Mar 25, 2024
by
mohamad.alturky
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
adjust engine, add representers resolver and bug fixing
parent
b0aea8ef
Changes
17
Hide whitespace changes
Inline
Side-by-side
Showing
17 changed files
with
105 additions
and
138 deletions
+105
-138
Lucene.java
src/main/java/com/search/lucene/Lucene.java
+3
-4
IDocumentRepresenter.java
...ments/representers/abstractions/IDocumentRepresenter.java
+1
-0
CSVDocumentStringRepresenter.java
...senters/implementations/CSVDocumentStringRepresenter.java
+14
-6
PDFDocumentStringRepresenter.java
...senters/implementations/PDFDocumentStringRepresenter.java
+7
-1
TextDocumentStringRepresenter.java
...enters/implementations/TextDocumentStringRepresenter.java
+7
-1
DocumentRepresenterResolver.java
...ts/representers/resolver/DocumentRepresenterResolver.java
+19
-0
LuceneEngineBuilder.java
...e/engine/builder/implementations/LuceneEngineBuilder.java
+8
-7
LuceneEngine.java
...om/search/lucene/engine/implementations/LuceneEngine.java
+13
-1
CSVFileIndexer.java
...earch/lucene/indexers/implementations/CSVFileIndexer.java
+10
-1
PDFFileIndexer.java
...earch/lucene/indexers/implementations/PDFFileIndexer.java
+3
-2
TextFileIndexer.java
...arch/lucene/indexers/implementations/TextFileIndexer.java
+4
-2
ISearcher.java
...a/com/search/lucene/searchers/abstractions/ISearcher.java
+1
-1
CSVFileSearcher.java
...rch/lucene/searchers/implementations/CSVFileSearcher.java
+0
-50
IndexedDocumentSearcher.java
...ne/searchers/implementations/IndexedDocumentSearcher.java
+10
-10
TextFileSearcher.java
...ch/lucene/searchers/implementations/TextFileSearcher.java
+0
-49
Constants.java
src/main/java/com/search/lucene/settings/Constants.java
+2
-2
RepresentationSchema.java
...java/com/search/lucene/settings/RepresentationSchema.java
+3
-1
No files found.
src/main/java/com/search/lucene/Lucene.java
View file @
82c3cdab
...
...
@@ -2,9 +2,8 @@ package com.search.lucene;
import
java.io.IOException
;
import
com.search.lucene.file.filters.implementations.TextFileFilter
;
import
com.search.lucene.indexers.implementations.TextFileIndexer
;
import
com.search.lucene.searchers.implementations.
TextFile
Searcher
;
import
com.search.lucene.searchers.implementations.
IndexedDocument
Searcher
;
import
com.search.lucene.settings.Constants
;
import
org.apache.lucene.document.Document
;
import
org.apache.lucene.queryparser.classic.ParseException
;
...
...
@@ -16,7 +15,7 @@ public class Lucene {
private
static
final
String
indexDir
=
"index"
;
private
static
final
String
dataDir
=
"data"
;
private
static
TextFileIndexer
indexer
;
private
static
TextFile
Searcher
textFileSearcher
;
private
static
IndexedDocument
Searcher
textFileSearcher
;
public
static
void
main
(
String
[]
args
)
{
try
{
...
...
@@ -42,7 +41,7 @@ public class Lucene {
}
private
static
void
search
(
String
searchQuery
)
throws
IOException
,
ParseException
{
textFileSearcher
=
new
TextFileSearcher
(
indexDir
);
textFileSearcher
=
new
IndexedDocumentSearcher
(
indexDir
,
Constants
.
CONTENT
);
long
startTime
=
System
.
currentTimeMillis
();
TopDocs
hits
=
textFileSearcher
.
search
(
searchQuery
);
long
endTime
=
System
.
currentTimeMillis
();
...
...
src/main/java/com/search/lucene/documents/representers/abstractions/IDocumentRepresenter.java
View file @
82c3cdab
...
...
@@ -4,4 +4,5 @@ import org.apache.lucene.document.Document;
public
interface
IDocumentRepresenter
<
T
>
{
T
Represent
(
Document
indexedDocument
);
String
representationSchema
();
}
src/main/java/com/search/lucene/documents/representers/implementations/CSVDocumentStringRepresenter.java
View file @
82c3cdab
...
...
@@ -2,22 +2,30 @@ package com.search.lucene.documents.representers.implementations;
import
com.search.lucene.documents.representers.abstractions.IDocumentRepresenter
;
import
com.search.lucene.settings.Constants
;
import
com.search.lucene.settings.RepresentationSchema
;
import
org.apache.lucene.document.Document
;
public
class
CSVDocumentStringRepresenter
implements
IDocumentRepresenter
<
String
>
{
@Override
public
String
Represent
(
Document
indexedDocument
)
{
return
String
.
format
(
"""
file path = %s\s
file name = %s\s
in row = %s\s
in column = %s\s
with content = %s\s
file path is %s\s
file name is %s\s
row number is %s\s
column name is %s\s
content is %s\s
csv row is %s\s
"""
,
indexedDocument
.
get
(
Constants
.
FILE_PATH
),
indexedDocument
.
get
(
Constants
.
FILE_NAME
),
indexedDocument
.
get
(
Constants
.
ROW
),
indexedDocument
.
get
(
Constants
.
COLUMN
),
indexedDocument
.
get
(
Constants
.
VALUE
));
indexedDocument
.
get
(
Constants
.
VALUE
),
indexedDocument
.
get
(
Constants
.
CSV_ROW
));
}
@Override
public
String
representationSchema
()
{
return
RepresentationSchema
.
CSV
;
}
}
src/main/java/com/search/lucene/documents/representers/implementations/PDFDocumentStringRepresenter.java
View file @
82c3cdab
...
...
@@ -2,6 +2,7 @@ package com.search.lucene.documents.representers.implementations;
import
com.search.lucene.documents.representers.abstractions.IDocumentRepresenter
;
import
com.search.lucene.settings.Constants
;
import
com.search.lucene.settings.RepresentationSchema
;
import
org.apache.lucene.document.Document
;
public
class
PDFDocumentStringRepresenter
implements
IDocumentRepresenter
<
String
>
{
...
...
@@ -14,6 +15,11 @@ public class PDFDocumentStringRepresenter implements IDocumentRepresenter<String
"""
,
indexedDocument
.
get
(
Constants
.
FILE_PATH
),
indexedDocument
.
get
(
Constants
.
FILE_NAME
),
indexedDocument
.
get
(
Constants
.
CONTENTS
));
indexedDocument
.
get
(
Constants
.
CONTENT
));
}
@Override
public
String
representationSchema
()
{
return
RepresentationSchema
.
PDF
;
}
}
src/main/java/com/search/lucene/documents/representers/implementations/TextDocumentStringRepresenter.java
View file @
82c3cdab
...
...
@@ -2,6 +2,7 @@ package com.search.lucene.documents.representers.implementations;
import
com.search.lucene.documents.representers.abstractions.IDocumentRepresenter
;
import
com.search.lucene.settings.Constants
;
import
com.search.lucene.settings.RepresentationSchema
;
import
org.apache.lucene.document.Document
;
public
class
TextDocumentStringRepresenter
implements
IDocumentRepresenter
<
String
>
{
...
...
@@ -14,6 +15,11 @@ public class TextDocumentStringRepresenter implements IDocumentRepresenter<Strin
"""
,
indexedDocument
.
get
(
Constants
.
FILE_PATH
),
indexedDocument
.
get
(
Constants
.
FILE_NAME
),
indexedDocument
.
get
(
Constants
.
CONTENTS
));
indexedDocument
.
get
(
Constants
.
CONTENT
));
}
@Override
public
String
representationSchema
()
{
return
RepresentationSchema
.
TEXT
;
}
}
src/main/java/com/search/lucene/documents/representers/resolver/DocumentRepresenterResolver.java
0 → 100644
View file @
82c3cdab
package
com
.
search
.
lucene
.
documents
.
representers
.
resolver
;
import
com.search.lucene.documents.representers.abstractions.IDocumentRepresenter
;
import
com.search.lucene.settings.RepresentationSchema
;
import
org.apache.lucene.document.Document
;
import
java.util.HashMap
;
public
class
DocumentRepresenterResolver
<
T
>
{
private
final
HashMap
<
String
,
IDocumentRepresenter
<
T
>>
hashMap
;
public
DocumentRepresenterResolver
(
HashMap
<
String
,
IDocumentRepresenter
<
T
>>
hashMap
)
{
this
.
hashMap
=
hashMap
;
}
public
T
resolveRepresentation
(
Document
document
)
{
return
hashMap
.
get
(
document
.
get
(
RepresentationSchema
.
TYPE
)).
Represent
(
document
);
}
}
src/main/java/com/search/lucene/engine/builder/implementations/LuceneEngineBuilder.java
View file @
82c3cdab
package
com
.
search
.
lucene
.
engine
.
builder
.
implementations
;
import
com.search.lucene.documents.representers.resolver.DocumentRepresenterResolver
;
import
com.search.lucene.engine.abstractions.ISearchEngine
;
import
com.search.lucene.engine.builder.abstractions.IEngineBuilder
;
import
com.search.lucene.engine.implementations.LuceneEngine
;
...
...
@@ -8,13 +9,14 @@ import com.search.lucene.indexers.implementations.CSVFileIndexer;
import
com.search.lucene.indexers.implementations.PDFFileIndexer
;
import
com.search.lucene.indexers.implementations.TextFileIndexer
;
import
com.search.lucene.searchers.abstractions.ISearcher
;
import
com.search.lucene.searchers.implementations.CSVFileSearcher
;
import
com.search.lucene.searchers.implementations.
PDFFile
Searcher
;
import
com.search.lucene.se
archers.implementations.TextFileSearcher
;
import
com.search.lucene.searchers.implementations.
IndexedDocument
Searcher
;
import
com.search.lucene.se
ttings.Constants
;
import
com.search.lucene.settings.IndexPaths
;
import
java.io.IOException
;
import
java.util.ArrayList
;
import
java.util.HashMap
;
public
class
LuceneEngineBuilder
implements
IEngineBuilder
{
@Override
...
...
@@ -26,10 +28,9 @@ public class LuceneEngineBuilder implements IEngineBuilder {
indexers
.
add
(
new
TextFileIndexer
(
IndexPaths
.
TEXT_INDEX_FOLDER_PATH
));
ArrayList
<
ISearcher
>
searchers
=
new
ArrayList
<>();
searchers
.
add
(
new
CSVFileSearcher
(
IndexPaths
.
CSV_INDEX_FOLDER_PATH
));
searchers
.
add
(
new
PDFFileSearcher
(
IndexPaths
.
PDF_INDEX_FOLDER_PATH
));
searchers
.
add
(
new
TextFileSearcher
(
IndexPaths
.
TEXT_INDEX_FOLDER_PATH
));
searchers
.
add
(
new
IndexedDocumentSearcher
(
IndexPaths
.
CSV_INDEX_FOLDER_PATH
,
Constants
.
VALUE
));
searchers
.
add
(
new
IndexedDocumentSearcher
(
IndexPaths
.
PDF_INDEX_FOLDER_PATH
,
Constants
.
CONTENT
));
searchers
.
add
(
new
IndexedDocumentSearcher
(
IndexPaths
.
TEXT_INDEX_FOLDER_PATH
,
Constants
.
CONTENT
));
return
new
LuceneEngine
(
indexers
,
searchers
);
}
...
...
src/main/java/com/search/lucene/engine/implementations/LuceneEngine.java
View file @
82c3cdab
package
com
.
search
.
lucene
.
engine
.
implementations
;
import
com.search.lucene.documents.representers.resolver.DocumentRepresenterResolver
;
import
com.search.lucene.engine.abstractions.ISearchEngine
;
import
com.search.lucene.indexers.abstractions.IFileIndexer
;
import
com.search.lucene.searchers.abstractions.ISearcher
;
import
org.apache.lucene.document.Document
;
import
org.apache.lucene.queryparser.classic.ParseException
;
import
org.apache.lucene.search.ScoreDoc
;
import
org.apache.lucene.search.TopDocs
;
import
java.io.IOException
;
import
java.util.ArrayList
;
...
...
@@ -28,6 +31,15 @@ public class LuceneEngine implements ISearchEngine {
@Override
public
ArrayList
<
Document
>
search
(
String
searchQuery
)
throws
IOException
,
ParseException
{
return
null
;
ArrayList
<
Document
>
documents
=
new
ArrayList
<>();
for
(
ISearcher
searcher
:
searchers
){
TopDocs
hits
=
searcher
.
search
(
searchQuery
);
for
(
ScoreDoc
scoreDoc
:
hits
.
scoreDocs
)
{
documents
.
add
(
searcher
.
getDocument
(
scoreDoc
));
}
}
return
documents
;
}
}
src/main/java/com/search/lucene/indexers/implementations/CSVFileIndexer.java
View file @
82c3cdab
...
...
@@ -5,6 +5,7 @@ import com.search.lucene.file.filters.abstractions.IFileFilter;
import
com.search.lucene.file.filters.implementations.CSVFileFilter
;
import
com.search.lucene.indexers.abstractions.IFileIndexer
;
import
com.search.lucene.settings.Constants
;
import
com.search.lucene.settings.RepresentationSchema
;
import
org.apache.lucene.analysis.standard.StandardAnalyzer
;
import
org.apache.lucene.document.Document
;
import
org.apache.lucene.document.Field
;
...
...
@@ -17,7 +18,6 @@ import org.apache.lucene.store.Directory;
import
org.apache.lucene.store.FSDirectory
;
import
java.io.File
;
import
java.io.FileFilter
;
import
java.io.FileReader
;
import
java.io.IOException
;
import
java.nio.file.Paths
;
...
...
@@ -55,15 +55,24 @@ public class CSVFileIndexer implements IFileIndexer {
int
line
=
1
;
while
((
nextRecord
=
csvReader
.
readNext
())
!=
null
)
{
line
++;
StringBuilder
csvRowStringBuilder
=
new
StringBuilder
();
for
(
String
s
:
nextRecord
)
{
csvRowStringBuilder
.
append
(
s
);
}
String
csvRow
=
csvRowStringBuilder
.
toString
();
for
(
int
i
=
0
;
i
<
nextRecord
.
length
;
i
++)
{
Document
document
=
new
Document
();
document
.
add
(
new
TextField
(
Constants
.
COLUMN
,
columns
[
i
],
Field
.
Store
.
YES
));
document
.
add
(
new
TextField
(
RepresentationSchema
.
TYPE
,
RepresentationSchema
.
CSV
,
Field
.
Store
.
YES
));
document
.
add
(
new
TextField
(
Constants
.
CSV_ROW
,
csvRow
,
Field
.
Store
.
YES
));
document
.
add
(
new
TextField
(
Constants
.
VALUE
,
nextRecord
[
i
],
Field
.
Store
.
YES
));
document
.
add
(
new
TextField
(
Constants
.
ROW
,
Integer
.
toString
(
line
),
Field
.
Store
.
YES
));
document
.
add
(
new
StringField
(
Constants
.
FILE_PATH
,
file
.
getCanonicalPath
(),
Field
.
Store
.
YES
));
document
.
add
(
new
StringField
(
Constants
.
FILE_NAME
,
file
.
getName
(),
Field
.
Store
.
YES
));
documents
.
add
(
document
);
System
.
out
.
print
(
nextRecord
[
i
]
+
"\t"
);
}
System
.
out
.
println
();
...
...
src/main/java/com/search/lucene/indexers/implementations/PDFFileIndexer.java
View file @
82c3cdab
...
...
@@ -4,6 +4,7 @@ import com.search.lucene.file.filters.abstractions.IFileFilter;
import
com.search.lucene.file.filters.implementations.PDFFileFilter
;
import
com.search.lucene.indexers.abstractions.IFileIndexer
;
import
com.search.lucene.settings.Constants
;
import
com.search.lucene.settings.RepresentationSchema
;
import
org.apache.lucene.analysis.standard.StandardAnalyzer
;
import
org.apache.lucene.document.Document
;
import
org.apache.lucene.document.TextField
;
...
...
@@ -18,7 +19,6 @@ import org.apache.pdfbox.pdmodel.PDDocument;
import
org.apache.pdfbox.text.PDFTextStripper
;
import
java.io.File
;
import
java.io.FileFilter
;
import
java.io.IOException
;
import
java.nio.file.Paths
;
...
...
@@ -44,7 +44,8 @@ public class PDFFileIndexer implements IFileIndexer {
PDDocument
pdDocument
=
PDDocument
.
load
(
file
);
String
content
=
new
PDFTextStripper
().
getText
(
pdDocument
);
Document
document
=
new
Document
();
document
.
add
(
new
TextField
(
Constants
.
CONTENTS
,
content
,
Field
.
Store
.
YES
));
document
.
add
(
new
TextField
(
Constants
.
CONTENT
,
content
,
Field
.
Store
.
YES
));
document
.
add
(
new
TextField
(
RepresentationSchema
.
TYPE
,
RepresentationSchema
.
PDF
,
Field
.
Store
.
YES
));
document
.
add
(
new
StringField
(
Constants
.
FILE_PATH
,
file
.
getCanonicalPath
(),
Field
.
Store
.
YES
));
document
.
add
(
new
StringField
(
Constants
.
FILE_NAME
,
file
.
getName
(),
Field
.
Store
.
YES
));
writer
.
addDocument
(
document
);
...
...
src/main/java/com/search/lucene/indexers/implementations/TextFileIndexer.java
View file @
82c3cdab
package
com
.
search
.
lucene
.
indexers
.
implementations
;
import
java.io.File
;
import
java.io.FileFilter
;
import
java.io.FileReader
;
import
java.io.IOException
;
import
java.nio.file.Paths
;
...
...
@@ -10,8 +9,10 @@ import com.search.lucene.file.filters.abstractions.IFileFilter;
import
com.search.lucene.file.filters.implementations.TextFileFilter
;
import
com.search.lucene.indexers.abstractions.IFileIndexer
;
import
com.search.lucene.settings.Constants
;
import
com.search.lucene.settings.RepresentationSchema
;
import
org.apache.lucene.analysis.standard.StandardAnalyzer
;
import
org.apache.lucene.document.Document
;
import
org.apache.lucene.document.Field
;
import
org.apache.lucene.document.TextField
;
import
org.apache.lucene.index.CorruptIndexException
;
import
org.apache.lucene.index.IndexWriter
;
...
...
@@ -40,13 +41,14 @@ public class TextFileIndexer implements IFileIndexer {
private
Document
getDocument
(
File
file
)
throws
IOException
{
Document
document
=
new
Document
();
TextField
contentField
=
new
TextField
(
Constants
.
CONTENT
S
,
new
FileReader
(
file
));
TextField
contentField
=
new
TextField
(
Constants
.
CONTENT
,
new
FileReader
(
file
));
TextField
fileNameField
=
new
TextField
(
Constants
.
FILE_NAME
,
file
.
getName
(),
TextField
.
Store
.
YES
);
TextField
filePathField
=
new
TextField
(
Constants
.
FILE_PATH
,
file
.
getCanonicalPath
(),
TextField
.
Store
.
YES
);
document
.
add
(
new
TextField
(
RepresentationSchema
.
TYPE
,
RepresentationSchema
.
TEXT
,
Field
.
Store
.
YES
));
document
.
add
(
contentField
);
document
.
add
(
fileNameField
);
...
...
src/main/java/com/search/lucene/searchers/abstractions/ISearcher.java
View file @
82c3cdab
...
...
@@ -10,6 +10,6 @@ import java.io.IOException;
public
interface
ISearcher
{
Document
getDocument
(
ScoreDoc
scoreDoc
)
throws
CorruptIndexException
,
IOException
;
Document
getDocument
(
ScoreDoc
scoreDoc
)
throws
IOException
;
TopDocs
search
(
String
searchQuery
)
throws
IOException
,
ParseException
;
}
src/main/java/com/search/lucene/searchers/implementations/CSVFileSearcher.java
deleted
100644 → 0
View file @
b0aea8ef
package
com
.
search
.
lucene
.
searchers
.
implementations
;
import
com.search.lucene.searchers.abstractions.ISearcher
;
import
com.search.lucene.settings.Constants
;
import
org.apache.lucene.analysis.standard.StandardAnalyzer
;
import
org.apache.lucene.document.Document
;
import
org.apache.lucene.index.CorruptIndexException
;
import
org.apache.lucene.index.DirectoryReader
;
import
org.apache.lucene.index.IndexReader
;
import
org.apache.lucene.queryparser.classic.ParseException
;
import
org.apache.lucene.queryparser.classic.QueryParser
;
import
org.apache.lucene.search.IndexSearcher
;
import
org.apache.lucene.search.Query
;
import
org.apache.lucene.search.ScoreDoc
;
import
org.apache.lucene.search.TopDocs
;
import
org.apache.lucene.store.Directory
;
import
org.apache.lucene.store.FSDirectory
;
import
java.io.IOException
;
import
java.nio.file.Paths
;
public
class
CSVFileSearcher
implements
ISearcher
{
IndexSearcher
indexSearcher
;
QueryParser
queryParser
;
Query
query
;
public
CSVFileSearcher
(
String
indexDirectoryPath
)
throws
IOException
{
Directory
indexDirectory
=
FSDirectory
.
open
(
Paths
.
get
(
indexDirectoryPath
));
IndexReader
reader
=
DirectoryReader
.
open
(
indexDirectory
);
indexSearcher
=
new
IndexSearcher
(
reader
);
queryParser
=
new
QueryParser
(
Constants
.
CONTENTS
,
new
StandardAnalyzer
());
}
@Override
public
TopDocs
search
(
String
searchQuery
)
throws
IOException
,
ParseException
{
query
=
queryParser
.
parse
(
searchQuery
);
return
indexSearcher
.
search
(
query
,
Constants
.
MAX_SEARCH
);
}
@Override
public
Document
getDocument
(
ScoreDoc
scoreDoc
)
throws
CorruptIndexException
,
IOException
{
return
indexSearcher
.
doc
(
scoreDoc
.
doc
);
}
}
\ No newline at end of file
src/main/java/com/search/lucene/searchers/implementations/
PDFFile
Searcher.java
→
src/main/java/com/search/lucene/searchers/implementations/
IndexedDocument
Searcher.java
View file @
82c3cdab
package
com
.
search
.
lucene
.
searchers
.
implementations
;
import
java.io.IOException
;
import
java.nio.file.Paths
;
import
com.search.lucene.documents.representers.abstractions.IDocumentRepresenter
;
import
com.search.lucene.searchers.abstractions.ISearcher
;
import
com.search.lucene.settings.Constants
;
import
org.apache.lucene.analysis.standard.StandardAnalyzer
;
...
...
@@ -16,29 +20,26 @@ import org.apache.lucene.search.TopDocs;
import
org.apache.lucene.store.Directory
;
import
org.apache.lucene.store.FSDirectory
;
import
java.io.IOException
;
import
java.nio.file.Paths
;
public
class
IndexedDocumentSearcher
implements
ISearcher
{
public
class
PDFFileSearcher
implements
ISearcher
{
IndexSearcher
indexSearcher
;
QueryParser
queryParser
;
Query
query
;
private
final
IndexSearcher
indexSearcher
;
private
final
QueryParser
queryParser
;
public
PDFFileSearcher
(
String
indexDirectoryPath
)
public
IndexedDocumentSearcher
(
String
indexDirectoryPath
,
String
queryForField
)
throws
IOException
{
Directory
indexDirectory
=
FSDirectory
.
open
(
Paths
.
get
(
indexDirectoryPath
));
IndexReader
reader
=
DirectoryReader
.
open
(
indexDirectory
);
indexSearcher
=
new
IndexSearcher
(
reader
);
queryParser
=
new
QueryParser
(
Constants
.
CONTENTS
,
queryParser
=
new
QueryParser
(
queryForField
,
new
StandardAnalyzer
());
}
@Override
public
TopDocs
search
(
String
searchQuery
)
throws
IOException
,
ParseException
{
query
=
queryParser
.
parse
(
searchQuery
);
Query
query
=
queryParser
.
parse
(
searchQuery
);
return
indexSearcher
.
search
(
query
,
Constants
.
MAX_SEARCH
);
}
@Override
...
...
@@ -46,5 +47,4 @@ public class PDFFileSearcher implements ISearcher {
throws
CorruptIndexException
,
IOException
{
return
indexSearcher
.
doc
(
scoreDoc
.
doc
);
}
}
\ No newline at end of file
src/main/java/com/search/lucene/searchers/implementations/TextFileSearcher.java
deleted
100644 → 0
View file @
b0aea8ef
package
com
.
search
.
lucene
.
searchers
.
implementations
;
import
java.io.IOException
;
import
java.nio.file.Paths
;
import
com.search.lucene.searchers.abstractions.ISearcher
;
import
com.search.lucene.settings.Constants
;
import
org.apache.lucene.analysis.standard.StandardAnalyzer
;
import
org.apache.lucene.document.Document
;
import
org.apache.lucene.index.CorruptIndexException
;
import
org.apache.lucene.index.DirectoryReader
;
import
org.apache.lucene.index.IndexReader
;
import
org.apache.lucene.queryparser.classic.ParseException
;
import
org.apache.lucene.queryparser.classic.QueryParser
;
import
org.apache.lucene.search.IndexSearcher
;
import
org.apache.lucene.search.Query
;
import
org.apache.lucene.search.ScoreDoc
;
import
org.apache.lucene.search.TopDocs
;
import
org.apache.lucene.store.Directory
;
import
org.apache.lucene.store.FSDirectory
;
public
class
TextFileSearcher
implements
ISearcher
{
IndexSearcher
indexSearcher
;
QueryParser
queryParser
;
Query
query
;
public
TextFileSearcher
(
String
indexDirectoryPath
)
throws
IOException
{
Directory
indexDirectory
=
FSDirectory
.
open
(
Paths
.
get
(
indexDirectoryPath
));
IndexReader
reader
=
DirectoryReader
.
open
(
indexDirectory
);
indexSearcher
=
new
IndexSearcher
(
reader
);
queryParser
=
new
QueryParser
(
Constants
.
CONTENTS
,
new
StandardAnalyzer
());
}
@Override
public
TopDocs
search
(
String
searchQuery
)
throws
IOException
,
ParseException
{
query
=
queryParser
.
parse
(
searchQuery
);
return
indexSearcher
.
search
(
query
,
Constants
.
MAX_SEARCH
);
}
@Override
public
Document
getDocument
(
ScoreDoc
scoreDoc
)
throws
CorruptIndexException
,
IOException
{
return
indexSearcher
.
doc
(
scoreDoc
.
doc
);
}
}
\ No newline at end of file
src/main/java/com/search/lucene/settings/Constants.java
View file @
82c3cdab
...
...
@@ -2,8 +2,8 @@ package com.search.lucene.settings;
public
class
Constants
{
public
static
final
String
CONTENT
S
=
"contents
"
;
public
static
final
String
INDEXED_FROM_FILE_FORMAT
=
"from_format
"
;
public
static
final
String
CONTENT
=
"content
"
;
public
static
final
String
CSV_ROW
=
"csv_row
"
;
public
static
final
String
FILE_NAME
=
"filename"
;
public
static
final
String
FILE_PATH
=
"filepath"
;
public
static
final
String
COLUMN
=
"column"
;
...
...
src/main/java/com/search/lucene/settings/
IndexType
.java
→
src/main/java/com/search/lucene/settings/
RepresentationSchema
.java
View file @
82c3cdab
package
com
.
search
.
lucene
.
settings
;
public
final
class
IndexType
{
public
final
class
RepresentationSchema
{
public
static
final
String
TYPE
=
"type"
;
public
static
final
String
CSV
=
"CSV"
;
public
static
final
String
PDF
=
"PDF"
;
public
static
final
String
TEXT
=
"TEXT"
;
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment