Added support for pagination, constraining search by timestamp range. Performance inprovement in searching
parent
52966fbd8c
commit
613e916172
|
@ -187,22 +187,31 @@ class IndexHandler(indexer: Indexer) extends HttpRequestHandler {
|
||||||
object SearchHandler extends HttpRequestHandler {
|
object SearchHandler extends HttpRequestHandler {
|
||||||
implicit val formats = DefaultFormats
|
implicit val formats = DefaultFormats
|
||||||
override def messageReceived(ctx: ChannelHandlerContext, request: HttpRequest) {
|
override def messageReceived(ctx: ChannelHandlerContext, request: HttpRequest) {
|
||||||
val method = request.getMethod()
|
future {
|
||||||
val searchRequest = if (HttpMethod.POST.equals(method)) {
|
val method = request.getMethod()
|
||||||
val content = request.getContent().toString(Charset.forName("UTF-8"))
|
val searchRequest = if (HttpMethod.POST.equals(method)) {
|
||||||
Serialization.read[SearchRequest](content)
|
val content = request.getContent().toString(Charset.forName("UTF-8"))
|
||||||
} else if (HttpMethod.GET.equals(method)) {
|
Serialization.read[SearchRequest](content)
|
||||||
val params = new QueryStringDecoder(request.getUri).getParameters
|
} else if (HttpMethod.GET.equals(method)) {
|
||||||
val server = params("server")(0)
|
val params = new QueryStringDecoder(request.getUri).getParameters.toMap
|
||||||
val channel = params("channel")(0)
|
val server = params("server")(0)
|
||||||
val botName = params("botName")(0)
|
val channel = params("channel")(0)
|
||||||
val query = params("query")(0)
|
val botName = params("botName")(0)
|
||||||
new SearchRequest(server, channel, botName, query)
|
val query = params("query")(0)
|
||||||
} else {
|
val page = params.get("page").collect({ case l => l.get(0) })
|
||||||
throw new UnsupportedOperationException("HTTP method " + method + " is not supported")
|
val pageSize = params.get("pageSize").collect({ case l => l.get(0) })
|
||||||
}
|
var sr = new SearchRequest(server, channel, botName, query)
|
||||||
|
if (page.isDefined)
|
||||||
|
sr = sr.copy(page = page.get.toInt)
|
||||||
|
if (pageSize.isDefined)
|
||||||
|
sr = sr.copy(pageSize = pageSize.get.toInt)
|
||||||
|
sr
|
||||||
|
} else {
|
||||||
|
throw new UnsupportedOperationException("HTTP method " + method + " is not supported")
|
||||||
|
}
|
||||||
|
|
||||||
val searchResult = Searcher.search(searchRequest)
|
val searchResult = Searcher.search(searchRequest)
|
||||||
logRequest(ctx, request, sendSuccess(ctx, request, Serialization.write(searchResult)))
|
logRequest(ctx, request, sendSuccess(ctx, request, Serialization.write(searchResult)))
|
||||||
|
} onFailure { case e : Exception => logger.error("Error", e) }
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
|
@ -32,12 +32,12 @@ class Indexer extends Logging {
|
||||||
|
|
||||||
import Indexer._
|
import Indexer._
|
||||||
|
|
||||||
private val indexQueue = new LinkedBlockingQueue[IndexRequest]
|
private val indexQueue = new LinkedBlockingQueue[IndexRequest](10000)
|
||||||
private val scheduler = Executors.newSingleThreadScheduledExecutor
|
private val scheduler = Executors.newSingleThreadScheduledExecutor
|
||||||
private val runLock = new ReentrantLock
|
private val runLock = new ReentrantLock
|
||||||
private var runFuture : Future[_] = null
|
private var runFuture : Future[_] = null
|
||||||
|
|
||||||
def index(indexRequest : IndexRequest) = indexQueue.offer(indexRequest)
|
def index(indexRequest : IndexRequest) = indexQueue.put(indexRequest)
|
||||||
|
|
||||||
def start {
|
def start {
|
||||||
logger.info("Starting indexer")
|
logger.info("Starting indexer")
|
||||||
|
|
|
@ -1,19 +1,25 @@
|
||||||
package net.abhinavsarkar.ircsearch.lucene
|
package net.abhinavsarkar.ircsearch.lucene
|
||||||
|
|
||||||
import java.io.File
|
import java.io.File
|
||||||
|
import java.text.ParseException
|
||||||
|
import java.text.SimpleDateFormat
|
||||||
|
|
||||||
import scala.collection.JavaConversions._
|
import scala.collection.JavaConversions._
|
||||||
import scala.collection.mutable
|
import scala.collection.mutable
|
||||||
|
import scala.collection.mutable.Buffer
|
||||||
|
|
||||||
import org.apache.lucene.analysis.Analyzer
|
import org.apache.lucene.analysis.Analyzer
|
||||||
import org.apache.lucene.index.IndexReader
|
import org.apache.lucene.queries.ChainedFilter
|
||||||
import org.apache.lucene.queryparser.classic.QueryParser
|
import org.apache.lucene.queryparser.classic.QueryParser
|
||||||
import org.apache.lucene.search.BooleanClause
|
import org.apache.lucene.search.BooleanClause
|
||||||
import org.apache.lucene.search.BooleanQuery
|
import org.apache.lucene.search.BooleanQuery
|
||||||
|
import org.apache.lucene.search.Filter
|
||||||
import org.apache.lucene.search.FilteredQuery
|
import org.apache.lucene.search.FilteredQuery
|
||||||
import org.apache.lucene.search.IndexSearcher
|
import org.apache.lucene.search.NumericRangeFilter
|
||||||
import org.apache.lucene.search.Query
|
import org.apache.lucene.search.Query
|
||||||
import org.apache.lucene.search.QueryWrapperFilter
|
import org.apache.lucene.search.QueryWrapperFilter
|
||||||
|
import org.apache.lucene.search.SearcherFactory
|
||||||
|
import org.apache.lucene.search.SearcherManager
|
||||||
import org.apache.lucene.search.Sort
|
import org.apache.lucene.search.Sort
|
||||||
import org.apache.lucene.search.SortField
|
import org.apache.lucene.search.SortField
|
||||||
import org.apache.lucene.search.TermQuery
|
import org.apache.lucene.search.TermQuery
|
||||||
|
@ -25,41 +31,75 @@ import net.abhinavsarkar.ircsearch.model._
|
||||||
|
|
||||||
object Searcher extends Logging {
|
object Searcher extends Logging {
|
||||||
|
|
||||||
private def mkIndexSearcher(dirPath : String) : IndexSearcher = {
|
val MaxHits = 1000
|
||||||
val indexDir = new File(dirPath)
|
|
||||||
assert(indexDir.exists && indexDir.isDirectory)
|
|
||||||
|
|
||||||
new IndexSearcher(IndexReader.open(FSDirectory.open(indexDir)))
|
val readers = mutable.Map[String, SearcherManager]()
|
||||||
|
|
||||||
|
private def mkIndexSearcher(dirPath : String) : SearcherManager = {
|
||||||
|
synchronized {
|
||||||
|
if (!(readers contains dirPath)) {
|
||||||
|
val indexDir = new File(dirPath)
|
||||||
|
assert(indexDir.exists && indexDir.isDirectory)
|
||||||
|
|
||||||
|
val dir = FSDirectory.open(indexDir)
|
||||||
|
readers += (dirPath -> new SearcherManager(dir, new SearcherFactory))
|
||||||
|
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
readers(dirPath)
|
||||||
}
|
}
|
||||||
|
|
||||||
private def mkQueryParser(analyzer : Analyzer) =
|
private def mkQueryParser(analyzer : Analyzer) =
|
||||||
new QueryParser(Indexer.LUCENE_VERSION, ChatLine.MSG, analyzer)
|
new QueryParser(Indexer.LUCENE_VERSION, ChatLine.MSG, analyzer)
|
||||||
|
|
||||||
private def filterifyQuery(query : Query, mustFields : Set[String]) : Query =
|
private def filterifyQuery(query : Query) : Query =
|
||||||
query match {
|
query match {
|
||||||
case boolQuery: BooleanQuery => {
|
case boolQuery: BooleanQuery => {
|
||||||
val newQuery = new BooleanQuery
|
val newQuery = new BooleanQuery
|
||||||
val filterQuery = new BooleanQuery
|
val filters = Buffer[Filter]()
|
||||||
for (clause <- boolQuery.getClauses) {
|
for (clause <- boolQuery.getClauses) {
|
||||||
val subQuery = clause.getQuery
|
val subQuery = clause.getQuery
|
||||||
if (subQuery.isInstanceOf[TermQuery]) {
|
if (subQuery.isInstanceOf[TermQuery]) {
|
||||||
val termQuery = subQuery.asInstanceOf[TermQuery]
|
val termQuery = subQuery.asInstanceOf[TermQuery]
|
||||||
val field = termQuery.getTerm.field
|
val field = termQuery.getTerm.field
|
||||||
if (mustFields contains field) {
|
val sdf = new SimpleDateFormat("yyMMdd")
|
||||||
clause.setOccur(BooleanClause.Occur.MUST)
|
field match {
|
||||||
filterQuery.add(clause)
|
case ChatLine.USER => {
|
||||||
} else {
|
val filterQuery = new BooleanQuery
|
||||||
newQuery.add(clause)
|
clause.setOccur(BooleanClause.Occur.MUST)
|
||||||
|
filterQuery.add(clause)
|
||||||
|
filters += new QueryWrapperFilter(filterQuery)
|
||||||
|
}
|
||||||
|
case "before" => {
|
||||||
|
try {
|
||||||
|
val ts = sdf.parse(termQuery.getTerm.text).getTime
|
||||||
|
filters += NumericRangeFilter.newLongRange(
|
||||||
|
ChatLine.TS, 0, ts, true, true)
|
||||||
|
} catch {
|
||||||
|
case e : ParseException => {}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
case "after" => {
|
||||||
|
try {
|
||||||
|
val ts = sdf.parse(termQuery.getTerm.text).getTime
|
||||||
|
filters += NumericRangeFilter.newLongRange(
|
||||||
|
ChatLine.TS, ts, java.lang.Long.MAX_VALUE, true, true)
|
||||||
|
} catch {
|
||||||
|
case e : ParseException => {}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
case _ => newQuery.add(clause)
|
||||||
}
|
}
|
||||||
} else {
|
} else {
|
||||||
newQuery.add(clause)
|
newQuery.add(clause)
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
if (filterQuery.clauses.isEmpty)
|
if (filters.isEmpty)
|
||||||
newQuery
|
newQuery
|
||||||
else
|
else
|
||||||
new FilteredQuery(newQuery, new QueryWrapperFilter(filterQuery))
|
new FilteredQuery(newQuery, new ChainedFilter(filters.toArray, ChainedFilter.AND))
|
||||||
}
|
}
|
||||||
case _ => query
|
case _ => query
|
||||||
}
|
}
|
||||||
|
@ -72,9 +112,9 @@ object Searcher extends Logging {
|
||||||
val analyzer = Indexer.mkAnalyzer
|
val analyzer = Indexer.mkAnalyzer
|
||||||
try {
|
try {
|
||||||
val queryParser = mkQueryParser(analyzer)
|
val queryParser = mkQueryParser(analyzer)
|
||||||
val query = filterifyQuery(queryParser.parse(searchRequest.query), Set(ChatLine.USER))
|
val query = filterifyQuery(queryParser.parse(searchRequest.query))
|
||||||
logger.debug("Query: {}", query)
|
logger.debug("Query: {}", query)
|
||||||
val (totalResults, results) = doSearch(indexDir, query, searchRequest.pageSize)
|
val (totalResults, results) = doSearch(indexDir, query, searchRequest.page, searchRequest.pageSize)
|
||||||
val searchResults = SearchResult.fromSearchRequest(searchRequest)
|
val searchResults = SearchResult.fromSearchRequest(searchRequest)
|
||||||
.copy(totalResults = totalResults, chatLines = results.map(_._1))
|
.copy(totalResults = totalResults, chatLines = results.map(_._1))
|
||||||
logger.debug("Search results: {}", searchResults)
|
logger.debug("Search results: {}", searchResults)
|
||||||
|
@ -84,21 +124,29 @@ object Searcher extends Logging {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
private def doSearch(indexDir : String, query : Query, maxHits : Int)
|
private def doSearch(indexDir : String, query : Query, page : Int, pageSize : Int)
|
||||||
: (Int, List[(ChatLine, Float)]) = {
|
: (Int, List[(ChatLine, Float)]) = {
|
||||||
val indexSearcher = mkIndexSearcher(indexDir)
|
val searcherMgr = mkIndexSearcher(indexDir)
|
||||||
val topDocs = indexSearcher.search(query, maxHits,
|
searcherMgr.maybeRefresh
|
||||||
new Sort(SortField.FIELD_SCORE, new SortField(ChatLine.TS, SortField.Type.LONG, true)))
|
val indexSearcher = searcherMgr.acquire()
|
||||||
val docs = topDocs.scoreDocs.map { sd =>
|
try {
|
||||||
val score = sd.score
|
val topDocs = indexSearcher.search(query, MaxHits.min((page + 1) * pageSize),
|
||||||
val doc = indexSearcher.doc(sd.doc).getFields.foldLeft(mutable.Map[String, String]()) {
|
new Sort(SortField.FIELD_SCORE, new SortField(ChatLine.TS, SortField.Type.LONG, true)))
|
||||||
(map, field) => map += (field.name -> field.stringValue)
|
val docs = topDocs.scoreDocs
|
||||||
}
|
.drop(page * pageSize)
|
||||||
|
.map { sd =>
|
||||||
|
val score = sd.score
|
||||||
|
val doc = indexSearcher.doc(sd.doc).getFields.foldLeft(mutable.Map[String, String]()) {
|
||||||
|
(map, field) => map += (field.name -> field.stringValue)
|
||||||
|
}
|
||||||
|
|
||||||
val chatLine = new ChatLine(doc(ChatLine.USER), doc(ChatLine.TS).toLong, doc(ChatLine.MSG))
|
val chatLine = new ChatLine(doc(ChatLine.USER), doc(ChatLine.TS).toLong, doc(ChatLine.MSG))
|
||||||
(chatLine, score)
|
(chatLine, score)
|
||||||
|
}
|
||||||
|
(topDocs.totalHits, docs.toList)
|
||||||
|
} finally {
|
||||||
|
searcherMgr.release(indexSearcher)
|
||||||
}
|
}
|
||||||
(topDocs.totalHits, docs.toList)
|
|
||||||
}
|
}
|
||||||
|
|
||||||
}
|
}
|
Loading…
Reference in New Issue