irc-search/src/main/scala/net/abhinavsarkar/ircsearch/lucene/Searcher.scala

172 lines
6.1 KiB
Scala
Raw Normal View History

package net.abhinavsarkar.ircsearch.lucene
import java.io.File
import java.text.ParseException
import java.text.SimpleDateFormat
import scala.collection.JavaConversions._
import scala.collection.immutable.Map
import scala.collection.mutable
import scala.collection.mutable.Buffer
import org.apache.lucene.analysis.Analyzer
import org.apache.lucene.queries.ChainedFilter
import org.apache.lucene.queryparser.classic.MultiFieldQueryParser
import org.apache.lucene.search.BooleanClause
import org.apache.lucene.search.BooleanQuery
import org.apache.lucene.search.Filter
import org.apache.lucene.search.FilteredQuery
import org.apache.lucene.search.NumericRangeFilter
import org.apache.lucene.search.Query
import org.apache.lucene.search.QueryWrapperFilter
import org.apache.lucene.search.SearcherFactory
import org.apache.lucene.search.SearcherManager
import org.apache.lucene.search.Sort
import org.apache.lucene.search.SortField
import org.apache.lucene.search.TermQuery
import org.apache.lucene.store.FSDirectory
import com.typesafe.scalalogging.slf4j.Logging
import net.abhinavsarkar.ircsearch.model._
object Searcher extends Logging {
val MaxHits = 1000
val MessageFieldBoost = java.lang.Float.valueOf(2.0f)
2013-05-19 11:32:20 +05:30
private val searcherMgrs = mutable.Map[String, SearcherManager]()
2013-05-19 11:32:20 +05:30
def close {
for (searcherMgr <- searcherMgrs.values)
searcherMgr.close
logger.info("Closed Searcher")
}
private def getSearcherMgr(dirPath : String) : SearcherManager = {
synchronized {
2013-05-19 11:32:20 +05:30
if (!(searcherMgrs contains dirPath)) {
val indexDir = new File(dirPath)
assert(indexDir.exists && indexDir.isDirectory)
val dir = FSDirectory.open(indexDir)
2013-05-19 11:32:20 +05:30
searcherMgrs += (dirPath -> new SearcherManager(dir, new SearcherFactory))
}
}
2013-05-19 11:32:20 +05:30
searcherMgrs(dirPath)
}
private def mkQueryParser(analyzer : Analyzer) =
new MultiFieldQueryParser(Indexer.LUCENE_VERSION,
List(ChatLine.MSG, ChatLine.CTXB, ChatLine.CTXA).toArray, analyzer,
Map(ChatLine.MSG -> MessageFieldBoost))
private def filterifyQuery(query : Query) : Query =
query match {
case boolQuery: BooleanQuery => {
val newQuery = new BooleanQuery
val filters = Buffer[Filter]()
for (clause <- boolQuery.getClauses) {
val subQuery = clause.getQuery
if (subQuery.isInstanceOf[TermQuery]) {
val termQuery = subQuery.asInstanceOf[TermQuery]
val field = termQuery.getTerm.field
val sdf = new SimpleDateFormat("yyMMdd")
field match {
case ChatLine.USER => {
val filterQuery = new BooleanQuery
clause.setOccur(BooleanClause.Occur.MUST)
filterQuery.add(clause)
filters += new QueryWrapperFilter(filterQuery)
}
case "before" => {
try {
val ts = sdf.parse(termQuery.getTerm.text).getTime
filters += NumericRangeFilter.newLongRange(
ChatLine.TS, 0, ts, true, true)
} catch {
case e : ParseException => {}
}
}
case "after" => {
try {
val ts = sdf.parse(termQuery.getTerm.text).getTime
filters += NumericRangeFilter.newLongRange(
ChatLine.TS, ts, java.lang.Long.MAX_VALUE, true, true)
} catch {
case e : ParseException => {}
}
}
case _ => newQuery.add(clause)
}
} else {
newQuery.add(clause)
}
}
if (filters.isEmpty)
newQuery
else
new FilteredQuery(newQuery, new ChainedFilter(filters.toArray, ChainedFilter.AND))
}
case _ => query
}
def search(searchRequest : SearchRequest) : SearchResult = {
logger.debug("Searching : [{} {} {}] {}",
searchRequest.server, searchRequest.channel, searchRequest.botName, searchRequest.query)
val indexDir = Indexer.getIndexDir(searchRequest.server, searchRequest.channel, searchRequest.botName)
val analyzer = Indexer.mkAnalyzer
try {
val queryParser = mkQueryParser(analyzer)
val query = filterifyQuery(queryParser.parse(searchRequest.query))
logger.debug("Query: {}", query)
val (totalResults, results) = doSearch(indexDir, query, searchRequest.page, searchRequest.pageSize)
val searchResults = SearchResult.fromSearchRequest(searchRequest)
.copy(totalResults = totalResults, chatLines = results.map(_._1))
logger.debug("Search results: {}", searchResults)
searchResults
} finally {
analyzer.close
}
}
private val DocFields = List(ChatLine.USER, ChatLine.TS, ChatLine.MSG, ChatLine.CTXB, ChatLine.CTXA)
private def doSearch(indexDir : String, query : Query, page : Int, pageSize : Int)
: (Int, List[(ChatLine, Float)]) = {
2013-05-19 11:32:20 +05:30
val searcherMgr = getSearcherMgr(indexDir)
searcherMgr.maybeRefresh
val indexSearcher = searcherMgr.acquire()
try {
val topDocs = indexSearcher.search(query, MaxHits.min((page + 1) * pageSize),
new Sort(SortField.FIELD_SCORE, new SortField(ChatLine.TS, SortField.Type.LONG, true)))
val docs = topDocs.scoreDocs
.drop(page * pageSize)
.map { sd =>
val score = sd.score
val doc = indexSearcher.doc(sd.doc).getFields.foldLeft(mutable.Map[String, String]()) {
(map, field) => map += (field.name -> field.stringValue)
}
val List(user, timestamp, message, contextBefore, contextAfter) = DocFields.map(doc)
val LineRe = "(\\d+) (.*?): (.*)".r
val List(ctxBefore, ctxAfter) = List(contextBefore, contextAfter).map {
_.split('\n').filterNot(_.isEmpty).map {
case LineRe(timestamp, user, message) => new ChatLine(user, timestamp.toLong, message)
}}
val chatLine = new ChatLine(user, timestamp.toLong, message, ctxBefore.toList, ctxAfter.toList)
(chatLine, score)
}
(topDocs.totalHits, docs.toList)
} finally {
searcherMgr.release(indexSearcher)
}
}
}