Indexing performance improvements

master
Abhinav Sarkar 2013-05-19 11:32:20 +05:30
parent 613e916172
commit 3822bd6017
3 changed files with 139 additions and 110 deletions

View File

@ -55,21 +55,24 @@ object Server extends App with Logging {
}}) }})
.localAddress(new InetSocketAddress(port)) .localAddress(new InetSocketAddress(port))
val cleanup = { () =>
stopServer(server)
Indexer.stop
Searcher.close
}
Runtime.getRuntime.addShutdownHook( Runtime.getRuntime.addShutdownHook(
new Thread("ShutdownHook") { new Thread("ShutdownHook") {
override def run { override def run = cleanup()
stopServer(server)
UnifiedHandler.stop
}
}) })
try { try {
Indexer.start
server.bind().sync.channel.closeFuture.sync server.bind().sync.channel.closeFuture.sync
} catch { } catch {
case e : Exception => { case e : Exception => {
logger.error("Exception while running server. Stopping server", e) logger.error("Exception while running server. Stopping server", e)
stopServer(server) cleanup()
UnifiedHandler.stop
} }
} }
} }
@ -85,21 +88,17 @@ object Server extends App with Logging {
@Sharable @Sharable
object UnifiedHandler extends ChannelInboundByteHandlerAdapter { object UnifiedHandler extends ChannelInboundByteHandlerAdapter {
lazy val indexer = { val indexer = new Indexer; indexer.start; indexer }
val httpRequestRouter = new HttpRequestRouter { val httpRequestRouter = new HttpRequestRouter {
val Echo = "^/echo$".r val Echo = "^/echo$".r
val Index = "^/index$".r val Index = "^/index$".r
val Search = "^/search.*".r val Search = "^/search.*".r
def route = { def route = {
case Echo() => EchoHandler case Echo() => EchoHandler
case Index() => new IndexHandler(indexer) case Index() => new IndexHandler
case Search() => SearchHandler case Search() => SearchHandler
} }
} }
def stop = indexer.stop
override def inboundBufferUpdated(ctx : ChannelHandlerContext, in: ByteBuf) { override def inboundBufferUpdated(ctx : ChannelHandlerContext, in: ByteBuf) {
if (in.readableBytes() < 5) { if (in.readableBytes() < 5) {
return; return;
@ -119,7 +118,7 @@ object UnifiedHandler extends ChannelInboundByteHandlerAdapter {
ctx.pipeline ctx.pipeline
.addLast("framedecoder", new DelimiterBasedFrameDecoder(1048576, Delimiters.lineDelimiter() : _*)) .addLast("framedecoder", new DelimiterBasedFrameDecoder(1048576, Delimiters.lineDelimiter() : _*))
.addLast("decoder", new StringDecoder(Charset.forName("UTF-8"))) .addLast("decoder", new StringDecoder(Charset.forName("UTF-8")))
.addLast("csvhandler", new TcpIndexHandler(indexer)) .addLast("csvhandler", new TcpIndexHandler)
.remove(this) .remove(this)
} }
ctx.nextInboundByteBuffer.writeBytes(in) ctx.nextInboundByteBuffer.writeBytes(in)
@ -140,7 +139,7 @@ object UnifiedHandler extends ChannelInboundByteHandlerAdapter {
} }
class TcpIndexHandler(indexer: Indexer) extends ChannelInboundMessageHandlerAdapter[String] { class TcpIndexHandler extends ChannelInboundMessageHandlerAdapter[String] {
var server: String = null var server: String = null
var channel : String = null var channel : String = null
var botName : String = null var botName : String = null
@ -156,7 +155,7 @@ class TcpIndexHandler(indexer: Indexer) extends ChannelInboundMessageHandlerAdap
botName = values(2) botName = values(2)
inited = true inited = true
} else { } else {
indexer.index(new IndexRequest(server, channel, botName, Indexer.index(new IndexRequest(server, channel, botName,
List(ChatLine(values(0), values(1).toLong, values(2))))) List(ChatLine(values(0), values(1).toLong, values(2)))))
} }
} }
@ -171,13 +170,13 @@ object EchoHandler extends HttpRequestHandler {
} }
@Sharable @Sharable
class IndexHandler(indexer: Indexer) extends HttpRequestHandler { class IndexHandler extends HttpRequestHandler {
implicit val formats = DefaultFormats implicit val formats = DefaultFormats
override def messageReceived(ctx: ChannelHandlerContext, request: HttpRequest) { override def messageReceived(ctx: ChannelHandlerContext, request: HttpRequest) {
future { future {
val content = request.getContent().toString(Charset.forName("UTF-8")) val content = request.getContent().toString(Charset.forName("UTF-8"))
val indexRequest = Serialization.read[IndexRequest](content) val indexRequest = Serialization.read[IndexRequest](content)
indexer.index(indexRequest) Indexer.index(indexRequest)
} }
logRequest(ctx, request, sendDefaultResponse(ctx, request)) logRequest(ctx, request, sendDefaultResponse(ctx, request))
} }
@ -187,7 +186,7 @@ class IndexHandler(indexer: Indexer) extends HttpRequestHandler {
object SearchHandler extends HttpRequestHandler { object SearchHandler extends HttpRequestHandler {
implicit val formats = DefaultFormats implicit val formats = DefaultFormats
override def messageReceived(ctx: ChannelHandlerContext, request: HttpRequest) { override def messageReceived(ctx: ChannelHandlerContext, request: HttpRequest) {
future { val f = future {
val method = request.getMethod() val method = request.getMethod()
val searchRequest = if (HttpMethod.POST.equals(method)) { val searchRequest = if (HttpMethod.POST.equals(method)) {
val content = request.getContent().toString(Charset.forName("UTF-8")) val content = request.getContent().toString(Charset.forName("UTF-8"))
@ -210,8 +209,12 @@ object SearchHandler extends HttpRequestHandler {
throw new UnsupportedOperationException("HTTP method " + method + " is not supported") throw new UnsupportedOperationException("HTTP method " + method + " is not supported")
} }
val searchResult = Searcher.search(searchRequest) Searcher.search(searchRequest)
logRequest(ctx, request, sendSuccess(ctx, request, Serialization.write(searchResult))) }
} onFailure { case e : Exception => logger.error("Error", e) } f onSuccess {
case searchResult =>
logRequest(ctx, request, sendSuccess(ctx, request, Serialization.write(searchResult)))
}
f onFailure { case e : Exception => logger.error("Error", e) }
} }
} }

View File

@ -7,9 +7,8 @@ import java.util.concurrent.Future
import java.util.concurrent.LinkedBlockingQueue import java.util.concurrent.LinkedBlockingQueue
import java.util.concurrent.TimeUnit import java.util.concurrent.TimeUnit
import java.util.concurrent.locks.ReentrantLock import java.util.concurrent.locks.ReentrantLock
import scala.collection.JavaConversions._ import scala.collection.JavaConversions._
import scala.collection.mutable
import org.apache.lucene.analysis.Analyzer import org.apache.lucene.analysis.Analyzer
import org.apache.lucene.analysis.core.KeywordAnalyzer import org.apache.lucene.analysis.core.KeywordAnalyzer
import org.apache.lucene.analysis.en.EnglishAnalyzer import org.apache.lucene.analysis.en.EnglishAnalyzer
@ -23,89 +22,35 @@ import org.apache.lucene.index.IndexWriter
import org.apache.lucene.index.IndexWriterConfig import org.apache.lucene.index.IndexWriterConfig
import org.apache.lucene.store.FSDirectory import org.apache.lucene.store.FSDirectory
import org.apache.lucene.util.Version import org.apache.lucene.util.Version
import com.typesafe.scalalogging.slf4j.Logging import com.typesafe.scalalogging.slf4j.Logging
import net.abhinavsarkar.ircsearch.model._ import net.abhinavsarkar.ircsearch.model._
import java.util.concurrent.BlockingDeque
import java.util.concurrent.BlockingQueue
class Indexer extends Logging { object Indexer extends Logging {
import Indexer._
private val indexQueue = new LinkedBlockingQueue[IndexRequest](10000)
private val scheduler = Executors.newSingleThreadScheduledExecutor
private val runLock = new ReentrantLock
private var runFuture : Future[_] = null
def index(indexRequest : IndexRequest) = indexQueue.put(indexRequest)
def start {
logger.info("Starting indexer")
runFuture = scheduler.scheduleWithFixedDelay(
new Runnable {
def run {
try {
runLock.lock
if (indexQueue.isEmpty)
return
val indexReqs = new ArrayList[IndexRequest]
indexQueue.drainTo(indexReqs)
doIndex(indexReqs.toList)
} catch {
case e : Throwable => logger.error("Exception while running indexer", e)
} finally {
runLock.unlock
}
}},
0, 1, TimeUnit.SECONDS)
}
def stop {
try {
runLock.lock
if (runFuture != null) {
runFuture.cancel(false)
runFuture = null
}
logger.info("Stopped indexer")
} finally {
runLock.unlock
}
}
private def doIndex(indexReqs: List[IndexRequest]) {
val indexRequests = indexReqs.groupBy { r =>
(r.server, r.channel, r.botName)
}
for (((server, channel, botName), indexRequestBatch) <- indexRequests) {
val indexDir = getIndexDir(server, channel, botName)
val analyzer = mkAnalyzer
val indexWriter = mkIndexWriter(indexDir, analyzer)
try {
for (indexRequest <- indexRequestBatch;
chatLine <- indexRequest.chatLines) {
val tsField = new LongField(ChatLine.TS, chatLine.timestamp, Field.Store.YES)
val userField = new StringField(ChatLine.USER, chatLine.user, Field.Store.YES)
val msgField = new TextField(ChatLine.MSG, chatLine.message, Field.Store.YES)
indexWriter.addDocument(List(tsField, userField, msgField), analyzer)
logger.debug("Indexed : [{} {} {}] [{}] {}: {}",
server, channel, botName, chatLine.timestamp.toString, chatLine.user, chatLine.message)
}
} finally {
indexWriter.close
analyzer.close
}
}
}
}
object Indexer {
val LUCENE_VERSION = Version.LUCENE_43 val LUCENE_VERSION = Version.LUCENE_43
private val indexReqQueue = new LinkedBlockingQueue[IndexRequest](10000)
private val scheduler = Executors.newScheduledThreadPool(2)
private val runLock = new ReentrantLock
private var indexingFuture : Future[_] = null
private var flushFuture : Future[_] = null
private val indexers = mutable.Map[String, IndexWriter]()
private def close {
for (indexer <- indexers.values)
indexer.close
logger.info("Closed Indexer")
}
private def flush {
for (indexer <- indexers.values)
indexer.commit
logger.info("Flushed Indexer")
}
def mkAnalyzer : Analyzer = { def mkAnalyzer : Analyzer = {
val defAnalyzer = new StandardAnalyzer(LUCENE_VERSION) val defAnalyzer = new StandardAnalyzer(LUCENE_VERSION)
val fieldAnalyzers = Map( val fieldAnalyzers = Map(
@ -115,15 +60,90 @@ object Indexer {
new PerFieldAnalyzerWrapper(defAnalyzer, fieldAnalyzers) new PerFieldAnalyzerWrapper(defAnalyzer, fieldAnalyzers)
} }
private def mkIndexWriter(dirPath : String, analyzer : Analyzer) : IndexWriter = { private def getIndexWriter(dirPath : String) : IndexWriter = {
val indexDir = new File(dirPath) synchronized {
if (indexDir.exists) { if (!(indexers contains dirPath)) {
assert(indexDir.isDirectory) val indexDir = new File(dirPath)
if (indexDir.exists) {
assert(indexDir.isDirectory)
}
val indexer = new IndexWriter(FSDirectory.open(indexDir),
new IndexWriterConfig(LUCENE_VERSION, mkAnalyzer))
indexers += (dirPath -> indexer)
}
} }
new IndexWriter(FSDirectory.open(indexDir), new IndexWriterConfig(LUCENE_VERSION, analyzer))
indexers(dirPath)
} }
def getIndexDir(server : String, channel : String, botName : String) : String = def getIndexDir(server : String, channel : String, botName : String) : String =
s"index-$server-$channel-$botName" s"index-$server-$channel-$botName"
def index(indexRequest : IndexRequest) = indexReqQueue.put(indexRequest)
private def doInLock(f : => Unit) {
try {
runLock.lock
f
} finally {
runLock.unlock
}
}
implicit private def funcToRunnable(f : => Unit) : Runnable = new Runnable {
def run {
try { f }
catch {
case e : Throwable => logger.error("Exception while running", e)
}
}}
def indexReqStream : Stream[IndexRequest] = Stream.cons(indexReqQueue.take, indexReqStream)
def start {
logger.info("Starting indexer")
indexingFuture = scheduler.submit {
for (indexReq <- indexReqStream)
doInLock {
doIndex(List(indexReq))
}
}
flushFuture = scheduler.scheduleWithFixedDelay(doInLock(flush), 0, 10, TimeUnit.SECONDS)
}
def stop {
doInLock {
if (indexingFuture != null) {
indexingFuture.cancel(false)
indexingFuture = null
}
if (flushFuture != null) {
flushFuture.cancel(false)
flushFuture = null
}
close
logger.info("Stopped indexer")
}
}
private def doIndex(indexReqs: List[IndexRequest]) {
val indexRequests = indexReqs.groupBy { r =>
(r.server, r.channel, r.botName)
}
for (((server, channel, botName), indexRequestBatch) <- indexRequests) {
val indexDir = getIndexDir(server, channel, botName)
val indexWriter = getIndexWriter(indexDir)
for (indexRequest <- indexRequestBatch;
chatLine <- indexRequest.chatLines) {
val tsField = new LongField(ChatLine.TS, chatLine.timestamp, Field.Store.YES)
val userField = new StringField(ChatLine.USER, chatLine.user, Field.Store.YES)
val msgField = new TextField(ChatLine.MSG, chatLine.message, Field.Store.YES)
indexWriter.addDocument(List(tsField, userField, msgField), indexWriter.getAnalyzer)
logger.debug("Indexed : [{} {} {}] [{}] {}: {}",
server, channel, botName, chatLine.timestamp.toString, chatLine.user, chatLine.message)
}
}
}
} }

View File

@ -33,21 +33,27 @@ object Searcher extends Logging {
val MaxHits = 1000 val MaxHits = 1000
val readers = mutable.Map[String, SearcherManager]() private val searcherMgrs = mutable.Map[String, SearcherManager]()
private def mkIndexSearcher(dirPath : String) : SearcherManager = { def close {
for (searcherMgr <- searcherMgrs.values)
searcherMgr.close
logger.info("Closed Searcher")
}
private def getSearcherMgr(dirPath : String) : SearcherManager = {
synchronized { synchronized {
if (!(readers contains dirPath)) { if (!(searcherMgrs contains dirPath)) {
val indexDir = new File(dirPath) val indexDir = new File(dirPath)
assert(indexDir.exists && indexDir.isDirectory) assert(indexDir.exists && indexDir.isDirectory)
val dir = FSDirectory.open(indexDir) val dir = FSDirectory.open(indexDir)
readers += (dirPath -> new SearcherManager(dir, new SearcherFactory)) searcherMgrs += (dirPath -> new SearcherManager(dir, new SearcherFactory))
} }
} }
readers(dirPath) searcherMgrs(dirPath)
} }
private def mkQueryParser(analyzer : Analyzer) = private def mkQueryParser(analyzer : Analyzer) =
@ -126,7 +132,7 @@ object Searcher extends Logging {
private def doSearch(indexDir : String, query : Query, page : Int, pageSize : Int) private def doSearch(indexDir : String, query : Query, page : Int, pageSize : Int)
: (Int, List[(ChatLine, Float)]) = { : (Int, List[(ChatLine, Float)]) = {
val searcherMgr = mkIndexSearcher(indexDir) val searcherMgr = getSearcherMgr(indexDir)
searcherMgr.maybeRefresh searcherMgr.maybeRefresh
val indexSearcher = searcherMgr.acquire() val indexSearcher = searcherMgr.acquire()
try { try {