103 lines
4.0 KiB
Scala
103 lines
4.0 KiB
Scala
package net.abhinavsarkar.ircsearch.lucene
|
|
|
|
import com.typesafe.scalalogging.slf4j.Logging
|
|
import org.apache.lucene.search.IndexSearcher
|
|
import java.io.File
|
|
import org.apache.lucene.index.IndexReader
|
|
import org.apache.lucene.store.FSDirectory
|
|
import org.apache.lucene.analysis.Analyzer
|
|
import org.apache.lucene.queryparser.classic.QueryParser
|
|
import org.apache.lucene.search.Query
|
|
import scala.collection.immutable.Set
|
|
import org.apache.lucene.search.BooleanQuery
|
|
import org.apache.lucene.search.TermQuery
|
|
import org.apache.lucene.search.BooleanClause
|
|
import org.apache.lucene.search.QueryWrapperFilter
|
|
import org.apache.lucene.search.Filter
|
|
import net.abhinavsarkar.ircsearch.model.SearchRequest
|
|
import net.abhinavsarkar.ircsearch.model.SearchResult
|
|
import org.apache.lucene.search.Sort
|
|
import org.apache.lucene.search.SortField
|
|
import scala.collection.JavaConversions._
|
|
import scala.collection.mutable
|
|
import net.abhinavsarkar.ircsearch.model.ChatLine
|
|
import net.abhinavsarkar.ircsearch.model.ChatLine
|
|
import net.abhinavsarkar.ircsearch.model.SearchResult
|
|
import net.abhinavsarkar.ircsearch.model.SearchResult
|
|
|
|
object Searcher extends Logging {
|
|
|
|
private def mkIndexSearcher(dirPath : String) : IndexSearcher = {
|
|
val indexDir = new File(dirPath)
|
|
assert(indexDir.exists && indexDir.isDirectory)
|
|
|
|
new IndexSearcher(IndexReader.open(FSDirectory.open(indexDir)))
|
|
}
|
|
|
|
private def mkQueryParser(analyzer : Analyzer) =
|
|
new QueryParser(Indexer.LUCENE_VERSION, "message", analyzer)
|
|
|
|
private def filterifyQuery(query : Query, mustFields : Set[String]) : (Query, Option[Filter]) =
|
|
query match {
|
|
case boolQuery: BooleanQuery => {
|
|
val newQuery = new BooleanQuery
|
|
val filterQuery = new BooleanQuery
|
|
for (clause <- boolQuery.getClauses) {
|
|
val subQuery = clause.getQuery
|
|
if (subQuery.isInstanceOf[TermQuery]) {
|
|
val termQuery = subQuery.asInstanceOf[TermQuery]
|
|
val field = termQuery.getTerm.field
|
|
if (mustFields contains field) {
|
|
clause.setOccur(BooleanClause.Occur.MUST)
|
|
filterQuery.add(clause)
|
|
} else {
|
|
newQuery.add(clause)
|
|
}
|
|
} else {
|
|
newQuery.add(clause)
|
|
}
|
|
}
|
|
|
|
(newQuery, if (filterQuery.clauses.isEmpty) None else Some(new QueryWrapperFilter(filterQuery)))
|
|
}
|
|
case _ => (query, None)
|
|
}
|
|
|
|
def search(searchRequest : SearchRequest) : SearchResult = {
|
|
logger.debug("Searching : [{} {} {}] {}",
|
|
searchRequest.server, searchRequest.channel, searchRequest.botName, searchRequest.query)
|
|
|
|
val indexDir = Indexer.getIndexDir(searchRequest.server, searchRequest.channel, searchRequest.botName)
|
|
val analyzer = Indexer.mkAnalyzer
|
|
try {
|
|
val queryParser = mkQueryParser(analyzer)
|
|
val (query, filter) = filterifyQuery(queryParser.parse(searchRequest.query), Set("user"))
|
|
logger.debug("Query: {}, Filter: {}", query, filter)
|
|
val (totalResults, results) = doSearch(indexDir, query, filter, searchRequest.pageSize)
|
|
val searchResults = SearchResult.fromSearchRequest(searchRequest)
|
|
.copy(totalResults = totalResults, chatLines = results.map(_._1))
|
|
logger.debug("Search results: {}", searchResults)
|
|
searchResults
|
|
} finally {
|
|
analyzer.close
|
|
}
|
|
}
|
|
|
|
private def doSearch(indexDir : String, query : Query, filter : Option[Filter], maxHits : Int)
|
|
: (Int, List[(ChatLine, Float)]) = {
|
|
val indexSearcher = mkIndexSearcher(indexDir)
|
|
val topDocs = indexSearcher.search(query, filter.orNull, maxHits,
|
|
new Sort(SortField.FIELD_SCORE, new SortField("timestamp", SortField.Type.LONG, true)))
|
|
val docs = topDocs.scoreDocs.map { sd =>
|
|
val score = sd.score
|
|
val doc = indexSearcher.doc(sd.doc).getFields.foldLeft(mutable.Map[String, String]()) {
|
|
(map, field) => map += (field.name -> field.stringValue)
|
|
}
|
|
|
|
val chatLine = new ChatLine(doc("user"), doc("timestamp").toLong, doc("message"))
|
|
(chatLine, score)
|
|
}
|
|
(topDocs.totalHits, docs.toList)
|
|
}
|
|
|
|
} |