Added support for pagination, constraining search by timestamp range. Performance inprovement in searching

master
Abhinav Sarkar 2013-05-18 18:02:52 +05:30
parent 52966fbd8c
commit 613e916172
3 changed files with 104 additions and 47 deletions

View File

@ -187,22 +187,31 @@ class IndexHandler(indexer: Indexer) extends HttpRequestHandler {
object SearchHandler extends HttpRequestHandler { object SearchHandler extends HttpRequestHandler {
implicit val formats = DefaultFormats implicit val formats = DefaultFormats
override def messageReceived(ctx: ChannelHandlerContext, request: HttpRequest) { override def messageReceived(ctx: ChannelHandlerContext, request: HttpRequest) {
val method = request.getMethod() future {
val searchRequest = if (HttpMethod.POST.equals(method)) { val method = request.getMethod()
val content = request.getContent().toString(Charset.forName("UTF-8")) val searchRequest = if (HttpMethod.POST.equals(method)) {
Serialization.read[SearchRequest](content) val content = request.getContent().toString(Charset.forName("UTF-8"))
} else if (HttpMethod.GET.equals(method)) { Serialization.read[SearchRequest](content)
val params = new QueryStringDecoder(request.getUri).getParameters } else if (HttpMethod.GET.equals(method)) {
val server = params("server")(0) val params = new QueryStringDecoder(request.getUri).getParameters.toMap
val channel = params("channel")(0) val server = params("server")(0)
val botName = params("botName")(0) val channel = params("channel")(0)
val query = params("query")(0) val botName = params("botName")(0)
new SearchRequest(server, channel, botName, query) val query = params("query")(0)
} else { val page = params.get("page").collect({ case l => l.get(0) })
throw new UnsupportedOperationException("HTTP method " + method + " is not supported") val pageSize = params.get("pageSize").collect({ case l => l.get(0) })
} var sr = new SearchRequest(server, channel, botName, query)
if (page.isDefined)
sr = sr.copy(page = page.get.toInt)
if (pageSize.isDefined)
sr = sr.copy(pageSize = pageSize.get.toInt)
sr
} else {
throw new UnsupportedOperationException("HTTP method " + method + " is not supported")
}
val searchResult = Searcher.search(searchRequest) val searchResult = Searcher.search(searchRequest)
logRequest(ctx, request, sendSuccess(ctx, request, Serialization.write(searchResult))) logRequest(ctx, request, sendSuccess(ctx, request, Serialization.write(searchResult)))
} onFailure { case e : Exception => logger.error("Error", e) }
} }
} }

View File

@ -32,12 +32,12 @@ class Indexer extends Logging {
import Indexer._ import Indexer._
private val indexQueue = new LinkedBlockingQueue[IndexRequest] private val indexQueue = new LinkedBlockingQueue[IndexRequest](10000)
private val scheduler = Executors.newSingleThreadScheduledExecutor private val scheduler = Executors.newSingleThreadScheduledExecutor
private val runLock = new ReentrantLock private val runLock = new ReentrantLock
private var runFuture : Future[_] = null private var runFuture : Future[_] = null
def index(indexRequest : IndexRequest) = indexQueue.offer(indexRequest) def index(indexRequest : IndexRequest) = indexQueue.put(indexRequest)
def start { def start {
logger.info("Starting indexer") logger.info("Starting indexer")

View File

@ -1,19 +1,25 @@
package net.abhinavsarkar.ircsearch.lucene package net.abhinavsarkar.ircsearch.lucene
import java.io.File import java.io.File
import java.text.ParseException
import java.text.SimpleDateFormat
import scala.collection.JavaConversions._ import scala.collection.JavaConversions._
import scala.collection.mutable import scala.collection.mutable
import scala.collection.mutable.Buffer
import org.apache.lucene.analysis.Analyzer import org.apache.lucene.analysis.Analyzer
import org.apache.lucene.index.IndexReader import org.apache.lucene.queries.ChainedFilter
import org.apache.lucene.queryparser.classic.QueryParser import org.apache.lucene.queryparser.classic.QueryParser
import org.apache.lucene.search.BooleanClause import org.apache.lucene.search.BooleanClause
import org.apache.lucene.search.BooleanQuery import org.apache.lucene.search.BooleanQuery
import org.apache.lucene.search.Filter
import org.apache.lucene.search.FilteredQuery import org.apache.lucene.search.FilteredQuery
import org.apache.lucene.search.IndexSearcher import org.apache.lucene.search.NumericRangeFilter
import org.apache.lucene.search.Query import org.apache.lucene.search.Query
import org.apache.lucene.search.QueryWrapperFilter import org.apache.lucene.search.QueryWrapperFilter
import org.apache.lucene.search.SearcherFactory
import org.apache.lucene.search.SearcherManager
import org.apache.lucene.search.Sort import org.apache.lucene.search.Sort
import org.apache.lucene.search.SortField import org.apache.lucene.search.SortField
import org.apache.lucene.search.TermQuery import org.apache.lucene.search.TermQuery
@ -25,41 +31,75 @@ import net.abhinavsarkar.ircsearch.model._
object Searcher extends Logging { object Searcher extends Logging {
private def mkIndexSearcher(dirPath : String) : IndexSearcher = { val MaxHits = 1000
val indexDir = new File(dirPath)
assert(indexDir.exists && indexDir.isDirectory)
new IndexSearcher(IndexReader.open(FSDirectory.open(indexDir))) val readers = mutable.Map[String, SearcherManager]()
private def mkIndexSearcher(dirPath : String) : SearcherManager = {
synchronized {
if (!(readers contains dirPath)) {
val indexDir = new File(dirPath)
assert(indexDir.exists && indexDir.isDirectory)
val dir = FSDirectory.open(indexDir)
readers += (dirPath -> new SearcherManager(dir, new SearcherFactory))
}
}
readers(dirPath)
} }
private def mkQueryParser(analyzer : Analyzer) = private def mkQueryParser(analyzer : Analyzer) =
new QueryParser(Indexer.LUCENE_VERSION, ChatLine.MSG, analyzer) new QueryParser(Indexer.LUCENE_VERSION, ChatLine.MSG, analyzer)
private def filterifyQuery(query : Query, mustFields : Set[String]) : Query = private def filterifyQuery(query : Query) : Query =
query match { query match {
case boolQuery: BooleanQuery => { case boolQuery: BooleanQuery => {
val newQuery = new BooleanQuery val newQuery = new BooleanQuery
val filterQuery = new BooleanQuery val filters = Buffer[Filter]()
for (clause <- boolQuery.getClauses) { for (clause <- boolQuery.getClauses) {
val subQuery = clause.getQuery val subQuery = clause.getQuery
if (subQuery.isInstanceOf[TermQuery]) { if (subQuery.isInstanceOf[TermQuery]) {
val termQuery = subQuery.asInstanceOf[TermQuery] val termQuery = subQuery.asInstanceOf[TermQuery]
val field = termQuery.getTerm.field val field = termQuery.getTerm.field
if (mustFields contains field) { val sdf = new SimpleDateFormat("yyMMdd")
clause.setOccur(BooleanClause.Occur.MUST) field match {
filterQuery.add(clause) case ChatLine.USER => {
} else { val filterQuery = new BooleanQuery
newQuery.add(clause) clause.setOccur(BooleanClause.Occur.MUST)
filterQuery.add(clause)
filters += new QueryWrapperFilter(filterQuery)
}
case "before" => {
try {
val ts = sdf.parse(termQuery.getTerm.text).getTime
filters += NumericRangeFilter.newLongRange(
ChatLine.TS, 0, ts, true, true)
} catch {
case e : ParseException => {}
}
}
case "after" => {
try {
val ts = sdf.parse(termQuery.getTerm.text).getTime
filters += NumericRangeFilter.newLongRange(
ChatLine.TS, ts, java.lang.Long.MAX_VALUE, true, true)
} catch {
case e : ParseException => {}
}
}
case _ => newQuery.add(clause)
} }
} else { } else {
newQuery.add(clause) newQuery.add(clause)
} }
} }
if (filterQuery.clauses.isEmpty) if (filters.isEmpty)
newQuery newQuery
else else
new FilteredQuery(newQuery, new QueryWrapperFilter(filterQuery)) new FilteredQuery(newQuery, new ChainedFilter(filters.toArray, ChainedFilter.AND))
} }
case _ => query case _ => query
} }
@ -72,9 +112,9 @@ object Searcher extends Logging {
val analyzer = Indexer.mkAnalyzer val analyzer = Indexer.mkAnalyzer
try { try {
val queryParser = mkQueryParser(analyzer) val queryParser = mkQueryParser(analyzer)
val query = filterifyQuery(queryParser.parse(searchRequest.query), Set(ChatLine.USER)) val query = filterifyQuery(queryParser.parse(searchRequest.query))
logger.debug("Query: {}", query) logger.debug("Query: {}", query)
val (totalResults, results) = doSearch(indexDir, query, searchRequest.pageSize) val (totalResults, results) = doSearch(indexDir, query, searchRequest.page, searchRequest.pageSize)
val searchResults = SearchResult.fromSearchRequest(searchRequest) val searchResults = SearchResult.fromSearchRequest(searchRequest)
.copy(totalResults = totalResults, chatLines = results.map(_._1)) .copy(totalResults = totalResults, chatLines = results.map(_._1))
logger.debug("Search results: {}", searchResults) logger.debug("Search results: {}", searchResults)
@ -84,21 +124,29 @@ object Searcher extends Logging {
} }
} }
private def doSearch(indexDir : String, query : Query, maxHits : Int) private def doSearch(indexDir : String, query : Query, page : Int, pageSize : Int)
: (Int, List[(ChatLine, Float)]) = { : (Int, List[(ChatLine, Float)]) = {
val indexSearcher = mkIndexSearcher(indexDir) val searcherMgr = mkIndexSearcher(indexDir)
val topDocs = indexSearcher.search(query, maxHits, searcherMgr.maybeRefresh
new Sort(SortField.FIELD_SCORE, new SortField(ChatLine.TS, SortField.Type.LONG, true))) val indexSearcher = searcherMgr.acquire()
val docs = topDocs.scoreDocs.map { sd => try {
val score = sd.score val topDocs = indexSearcher.search(query, MaxHits.min((page + 1) * pageSize),
val doc = indexSearcher.doc(sd.doc).getFields.foldLeft(mutable.Map[String, String]()) { new Sort(SortField.FIELD_SCORE, new SortField(ChatLine.TS, SortField.Type.LONG, true)))
(map, field) => map += (field.name -> field.stringValue) val docs = topDocs.scoreDocs
} .drop(page * pageSize)
.map { sd =>
val score = sd.score
val doc = indexSearcher.doc(sd.doc).getFields.foldLeft(mutable.Map[String, String]()) {
(map, field) => map += (field.name -> field.stringValue)
}
val chatLine = new ChatLine(doc(ChatLine.USER), doc(ChatLine.TS).toLong, doc(ChatLine.MSG)) val chatLine = new ChatLine(doc(ChatLine.USER), doc(ChatLine.TS).toLong, doc(ChatLine.MSG))
(chatLine, score) (chatLine, score)
}
(topDocs.totalHits, docs.toList)
} finally {
searcherMgr.release(indexSearcher)
} }
(topDocs.totalHits, docs.toList)
} }
} }