Browse Source

Indexing performance improvements

Abhinav Sarkar 7 years ago
parent
commit
3822bd6017

+ 23
- 20
src/main/scala/net/abhinavsarkar/ircsearch/Server.scala View File

@@ -55,21 +55,24 @@ object Server extends App with Logging {
55 55
         }})
56 56
       .localAddress(new InetSocketAddress(port))
57 57
 
58
+    val cleanup = { () =>
59
+      stopServer(server)
60
+      Indexer.stop
61
+      Searcher.close
62
+    }
63
+
58 64
     Runtime.getRuntime.addShutdownHook(
59 65
       new Thread("ShutdownHook") {
60
-        override def run {
61
-          stopServer(server)
62
-          UnifiedHandler.stop
63
-        }
66
+        override def run = cleanup()
64 67
       })
65 68
 
66 69
     try {
70
+      Indexer.start
67 71
       server.bind().sync.channel.closeFuture.sync
68 72
     } catch {
69 73
       case e : Exception => {
70 74
         logger.error("Exception while running server. Stopping server", e)
71
-        stopServer(server)
72
-        UnifiedHandler.stop
75
+        cleanup()
73 76
       }
74 77
     }
75 78
   }
@@ -85,21 +88,17 @@ object Server extends App with Logging {
85 88
 @Sharable
86 89
 object UnifiedHandler extends ChannelInboundByteHandlerAdapter {
87 90
 
88
-  lazy val indexer = { val indexer = new Indexer; indexer.start; indexer }
89
-
90 91
   val httpRequestRouter = new HttpRequestRouter {
91 92
     val Echo = "^/echo$".r
92 93
     val Index = "^/index$".r
93 94
     val Search = "^/search.*".r
94 95
     def route = {
95 96
       case Echo() => EchoHandler
96
-      case Index() => new IndexHandler(indexer)
97
+      case Index() => new IndexHandler
97 98
       case Search() => SearchHandler
98 99
     }
99 100
   }
100 101
 
101
-  def stop = indexer.stop
102
-
103 102
   override def inboundBufferUpdated(ctx : ChannelHandlerContext, in: ByteBuf) {
104 103
     if (in.readableBytes() < 5) {
105 104
       return;
@@ -119,7 +118,7 @@ object UnifiedHandler extends ChannelInboundByteHandlerAdapter {
119 118
       ctx.pipeline
120 119
         .addLast("framedecoder", new DelimiterBasedFrameDecoder(1048576, Delimiters.lineDelimiter() : _*))
121 120
         .addLast("decoder", new StringDecoder(Charset.forName("UTF-8")))
122
-        .addLast("csvhandler", new TcpIndexHandler(indexer))
121
+        .addLast("csvhandler", new TcpIndexHandler)
123 122
         .remove(this)
124 123
     }
125 124
     ctx.nextInboundByteBuffer.writeBytes(in)
@@ -140,7 +139,7 @@ object UnifiedHandler extends ChannelInboundByteHandlerAdapter {
140 139
 
141 140
 }
142 141
 
143
-class TcpIndexHandler(indexer: Indexer) extends ChannelInboundMessageHandlerAdapter[String] {
142
+class TcpIndexHandler extends ChannelInboundMessageHandlerAdapter[String] {
144 143
   var server: String = null
145 144
   var channel : String = null
146 145
   var botName : String = null
@@ -156,7 +155,7 @@ class TcpIndexHandler(indexer: Indexer) extends ChannelInboundMessageHandlerAdap
156 155
       botName = values(2)
157 156
       inited = true
158 157
     } else {
159
-      indexer.index(new IndexRequest(server, channel, botName,
158
+      Indexer.index(new IndexRequest(server, channel, botName,
160 159
           List(ChatLine(values(0), values(1).toLong, values(2)))))
161 160
     }
162 161
   }
@@ -171,13 +170,13 @@ object EchoHandler extends HttpRequestHandler {
171 170
 }
172 171
 
173 172
 @Sharable
174
-class IndexHandler(indexer: Indexer) extends HttpRequestHandler {
173
+class IndexHandler extends HttpRequestHandler {
175 174
   implicit val formats = DefaultFormats
176 175
   override def messageReceived(ctx: ChannelHandlerContext, request: HttpRequest) {
177 176
     future {
178 177
       val content = request.getContent().toString(Charset.forName("UTF-8"))
179 178
       val indexRequest = Serialization.read[IndexRequest](content)
180
-      indexer.index(indexRequest)
179
+      Indexer.index(indexRequest)
181 180
     }
182 181
     logRequest(ctx, request, sendDefaultResponse(ctx, request))
183 182
   }
@@ -187,7 +186,7 @@ class IndexHandler(indexer: Indexer) extends HttpRequestHandler {
187 186
 object SearchHandler extends HttpRequestHandler {
188 187
   implicit val formats = DefaultFormats
189 188
   override def messageReceived(ctx: ChannelHandlerContext, request: HttpRequest) {
190
-    future {
189
+    val f = future {
191 190
       val method = request.getMethod()
192 191
       val searchRequest = if (HttpMethod.POST.equals(method)) {
193 192
         val content = request.getContent().toString(Charset.forName("UTF-8"))
@@ -210,8 +209,12 @@ object SearchHandler extends HttpRequestHandler {
210 209
         throw new UnsupportedOperationException("HTTP method " + method + " is not supported")
211 210
       }
212 211
 
213
-      val searchResult = Searcher.search(searchRequest)
214
-      logRequest(ctx, request, sendSuccess(ctx, request, Serialization.write(searchResult)))
215
-    } onFailure { case e : Exception => logger.error("Error", e) }
212
+      Searcher.search(searchRequest)
213
+    }
214
+    f onSuccess {
215
+      case searchResult =>
216
+        logRequest(ctx, request, sendSuccess(ctx, request, Serialization.write(searchResult)))
217
+    }
218
+    f onFailure { case e : Exception => logger.error("Error", e) }
216 219
   }
217 220
 }

+ 96
- 76
src/main/scala/net/abhinavsarkar/ircsearch/lucene/Indexer.scala View File

@@ -7,9 +7,8 @@ import java.util.concurrent.Future
7 7
 import java.util.concurrent.LinkedBlockingQueue
8 8
 import java.util.concurrent.TimeUnit
9 9
 import java.util.concurrent.locks.ReentrantLock
10
-
11 10
 import scala.collection.JavaConversions._
12
-
11
+import scala.collection.mutable
13 12
 import org.apache.lucene.analysis.Analyzer
14 13
 import org.apache.lucene.analysis.core.KeywordAnalyzer
15 14
 import org.apache.lucene.analysis.en.EnglishAnalyzer
@@ -23,54 +22,107 @@ import org.apache.lucene.index.IndexWriter
23 22
 import org.apache.lucene.index.IndexWriterConfig
24 23
 import org.apache.lucene.store.FSDirectory
25 24
 import org.apache.lucene.util.Version
26
-
27 25
 import com.typesafe.scalalogging.slf4j.Logging
28
-
29 26
 import net.abhinavsarkar.ircsearch.model._
27
+import java.util.concurrent.BlockingDeque
28
+import java.util.concurrent.BlockingQueue
30 29
 
31
-class Indexer extends Logging {
30
+object Indexer extends Logging {
32 31
 
33
-  import Indexer._
32
+  val LUCENE_VERSION = Version.LUCENE_43
34 33
 
35
-  private val indexQueue = new LinkedBlockingQueue[IndexRequest](10000)
36
-  private val scheduler = Executors.newSingleThreadScheduledExecutor
34
+  private val indexReqQueue = new LinkedBlockingQueue[IndexRequest](10000)
35
+  private val scheduler = Executors.newScheduledThreadPool(2)
37 36
   private val runLock = new ReentrantLock
38
-  private var runFuture : Future[_] = null
37
+  private var indexingFuture : Future[_] = null
38
+  private var flushFuture : Future[_] = null
39 39
 
40
-  def index(indexRequest : IndexRequest) = indexQueue.put(indexRequest)
40
+  private val indexers = mutable.Map[String, IndexWriter]()
41
+
42
+  private def close {
43
+    for (indexer <- indexers.values)
44
+      indexer.close
45
+    logger.info("Closed Indexer")
46
+  }
47
+
48
+  private def flush {
49
+    for (indexer <- indexers.values)
50
+      indexer.commit
51
+    logger.info("Flushed Indexer")
52
+  }
53
+
54
+  def mkAnalyzer : Analyzer = {
55
+    val defAnalyzer = new StandardAnalyzer(LUCENE_VERSION)
56
+    val fieldAnalyzers = Map(
57
+        ChatLine.USER -> new KeywordAnalyzer,
58
+        ChatLine.MSG -> new EnglishAnalyzer(LUCENE_VERSION))
59
+
60
+    new PerFieldAnalyzerWrapper(defAnalyzer, fieldAnalyzers)
61
+  }
62
+
63
+  private def getIndexWriter(dirPath : String) : IndexWriter = {
64
+    synchronized {
65
+      if (!(indexers contains dirPath)) {
66
+        val indexDir = new File(dirPath)
67
+        if (indexDir.exists) {
68
+          assert(indexDir.isDirectory)
69
+        }
70
+        val indexer = new IndexWriter(FSDirectory.open(indexDir),
71
+            new IndexWriterConfig(LUCENE_VERSION, mkAnalyzer))
72
+        indexers += (dirPath -> indexer)
73
+      }
74
+    }
75
+
76
+    indexers(dirPath)
77
+  }
78
+
79
+  def getIndexDir(server : String, channel : String, botName : String) : String =
80
+    s"index-$server-$channel-$botName"
81
+
82
+  def index(indexRequest : IndexRequest) = indexReqQueue.put(indexRequest)
83
+
84
+  private def doInLock(f : => Unit) {
85
+    try {
86
+      runLock.lock
87
+      f
88
+    } finally {
89
+      runLock.unlock
90
+    }
91
+  }
92
+
93
+  implicit private def funcToRunnable(f : => Unit) : Runnable = new Runnable {
94
+    def run {
95
+      try { f }
96
+      catch {
97
+        case e : Throwable => logger.error("Exception while running", e)
98
+      }
99
+    }}
100
+
101
+  def indexReqStream : Stream[IndexRequest] = Stream.cons(indexReqQueue.take, indexReqStream)
41 102
 
42 103
   def start {
43 104
     logger.info("Starting indexer")
44
-    runFuture = scheduler.scheduleWithFixedDelay(
45
-      new Runnable {
46
-        def run {
47
-          try {
48
-            runLock.lock
49
-            if (indexQueue.isEmpty)
50
-              return
51
-
52
-            val indexReqs = new ArrayList[IndexRequest]
53
-            indexQueue.drainTo(indexReqs)
54
-            doIndex(indexReqs.toList)
55
-          } catch {
56
-            case e : Throwable => logger.error("Exception while running indexer", e)
57
-          } finally {
58
-            runLock.unlock
59
-          }
60
-        }},
61
-      0, 1, TimeUnit.SECONDS)
105
+    indexingFuture = scheduler.submit {
106
+      for (indexReq <- indexReqStream)
107
+        doInLock {
108
+          doIndex(List(indexReq))
109
+        }
110
+    }
111
+    flushFuture = scheduler.scheduleWithFixedDelay(doInLock(flush), 0, 10, TimeUnit.SECONDS)
62 112
   }
63 113
 
64 114
   def stop {
65
-    try {
66
-      runLock.lock
67
-      if (runFuture != null) {
68
-        runFuture.cancel(false)
69
-        runFuture = null
115
+    doInLock {
116
+      if (indexingFuture != null) {
117
+        indexingFuture.cancel(false)
118
+        indexingFuture = null
119
+      }
120
+      if (flushFuture != null) {
121
+        flushFuture.cancel(false)
122
+        flushFuture = null
70 123
       }
124
+      close
71 125
       logger.info("Stopped indexer")
72
-    } finally {
73
-      runLock.unlock
74 126
     }
75 127
   }
76 128
 
@@ -81,49 +133,17 @@ class Indexer extends Logging {
81 133
 
82 134
     for (((server, channel, botName), indexRequestBatch) <- indexRequests) {
83 135
       val indexDir = getIndexDir(server, channel, botName)
84
-      val analyzer = mkAnalyzer
85
-      val indexWriter = mkIndexWriter(indexDir, analyzer)
86
-      try {
87
-        for (indexRequest <- indexRequestBatch;
88
-             chatLine     <- indexRequest.chatLines) {
89
-          val tsField = new LongField(ChatLine.TS, chatLine.timestamp, Field.Store.YES)
90
-          val userField = new StringField(ChatLine.USER, chatLine.user, Field.Store.YES)
91
-          val msgField = new TextField(ChatLine.MSG, chatLine.message, Field.Store.YES)
92
-          indexWriter.addDocument(List(tsField, userField, msgField), analyzer)
93
-          logger.debug("Indexed : [{} {} {}] [{}] {}: {}",
94
-              server, channel, botName, chatLine.timestamp.toString, chatLine.user, chatLine.message)
95
-        }
96
-      } finally {
97
-        indexWriter.close
98
-        analyzer.close
136
+      val indexWriter = getIndexWriter(indexDir)
137
+      for (indexRequest <- indexRequestBatch;
138
+           chatLine     <- indexRequest.chatLines) {
139
+        val tsField = new LongField(ChatLine.TS, chatLine.timestamp, Field.Store.YES)
140
+        val userField = new StringField(ChatLine.USER, chatLine.user, Field.Store.YES)
141
+        val msgField = new TextField(ChatLine.MSG, chatLine.message, Field.Store.YES)
142
+        indexWriter.addDocument(List(tsField, userField, msgField), indexWriter.getAnalyzer)
143
+        logger.debug("Indexed : [{} {} {}] [{}] {}: {}",
144
+            server, channel, botName, chatLine.timestamp.toString, chatLine.user, chatLine.message)
99 145
       }
100 146
     }
101 147
   }
102 148
 
103
-}
104
-
105
-object Indexer {
106
-
107
-  val LUCENE_VERSION = Version.LUCENE_43
108
-
109
-  def mkAnalyzer : Analyzer = {
110
-    val defAnalyzer = new StandardAnalyzer(LUCENE_VERSION)
111
-    val fieldAnalyzers = Map(
112
-        ChatLine.USER -> new KeywordAnalyzer,
113
-        ChatLine.MSG -> new EnglishAnalyzer(LUCENE_VERSION))
114
-
115
-    new PerFieldAnalyzerWrapper(defAnalyzer, fieldAnalyzers)
116
-  }
117
-
118
-  private def mkIndexWriter(dirPath : String, analyzer : Analyzer) : IndexWriter = {
119
-    val indexDir = new File(dirPath)
120
-    if (indexDir.exists) {
121
-      assert(indexDir.isDirectory)
122
-    }
123
-    new IndexWriter(FSDirectory.open(indexDir), new IndexWriterConfig(LUCENE_VERSION, analyzer))
124
-  }
125
-
126
-  def getIndexDir(server : String, channel : String, botName : String) : String =
127
-    s"index-$server-$channel-$botName"
128
-
129 149
 }

+ 12
- 6
src/main/scala/net/abhinavsarkar/ircsearch/lucene/Searcher.scala View File

@@ -33,21 +33,27 @@ object Searcher extends Logging {
33 33
 
34 34
   val MaxHits = 1000
35 35
 
36
-  val readers = mutable.Map[String, SearcherManager]()
36
+  private val searcherMgrs = mutable.Map[String, SearcherManager]()
37 37
 
38
-  private def mkIndexSearcher(dirPath : String) : SearcherManager = {
38
+  def close {
39
+    for (searcherMgr <- searcherMgrs.values)
40
+      searcherMgr.close
41
+    logger.info("Closed Searcher")
42
+  }
43
+
44
+  private def getSearcherMgr(dirPath : String) : SearcherManager = {
39 45
     synchronized {
40
-      if (!(readers contains dirPath)) {
46
+      if (!(searcherMgrs contains dirPath)) {
41 47
         val indexDir = new File(dirPath)
42 48
         assert(indexDir.exists && indexDir.isDirectory)
43 49
 
44 50
         val dir = FSDirectory.open(indexDir)
45
-        readers += (dirPath -> new SearcherManager(dir, new SearcherFactory))
51
+        searcherMgrs += (dirPath -> new SearcherManager(dir, new SearcherFactory))
46 52
 
47 53
       }
48 54
     }
49 55
 
50
-    readers(dirPath)
56
+    searcherMgrs(dirPath)
51 57
   }
52 58
 
53 59
   private def mkQueryParser(analyzer : Analyzer) =
@@ -126,7 +132,7 @@ object Searcher extends Logging {
126 132
 
127 133
   private def doSearch(indexDir : String, query : Query, page : Int, pageSize : Int)
128 134
     : (Int, List[(ChatLine, Float)]) = {
129
-    val searcherMgr = mkIndexSearcher(indexDir)
135
+    val searcherMgr = getSearcherMgr(indexDir)
130 136
     searcherMgr.maybeRefresh
131 137
     val indexSearcher = searcherMgr.acquire()
132 138
     try {

Loading…
Cancel
Save