Browse Source

Added rate limiter for indexing

Abhinav Sarkar 7 years ago
parent
commit
f577cd5fbd

+ 8
- 2
pom.xml View File

@@ -11,6 +11,7 @@
11 11
 		<maven.compiler.target>1.6</maven.compiler.target>
12 12
 		<encoding>UTF-8</encoding>
13 13
 		<scala.version>2.10.0</scala.version>
14
+		<scala.majorversion>2.10</scala.majorversion>
14 15
 		<lucene.version>4.3.0</lucene.version>
15 16
 		<project.dependencyDir>${project.build.directory}/dependency</project.dependencyDir>
16 17
 	</properties>
@@ -39,7 +40,7 @@
39 40
 		</dependency>
40 41
 		<dependency>
41 42
 			<groupId>com.typesafe</groupId>
42
-			<artifactId>scalalogging-slf4j_2.10</artifactId>
43
+			<artifactId>scalalogging-slf4j_${scala.majorversion}</artifactId>
43 44
 			<version>1.0.1</version>
44 45
 		</dependency>
45 46
 		<dependency>
@@ -50,7 +51,7 @@
50 51
 		</dependency>
51 52
 		<dependency>
52 53
 			<groupId>net.liftweb</groupId>
53
-			<artifactId>lift-json_2.10</artifactId>
54
+			<artifactId>lift-json_${scala.majorversion}</artifactId>
54 55
 			<version>2.5-RC5</version>
55 56
 		</dependency>
56 57
 		<dependency>
@@ -78,6 +79,11 @@
78 79
 			<artifactId>opencsv</artifactId>
79 80
 			<version>2.3</version>
80 81
 		</dependency>
82
+		<dependency>
83
+			<groupId>com.google.guava</groupId>
84
+			<artifactId>guava</artifactId>
85
+			<version>14.0.1</version>
86
+		</dependency>
81 87
 	</dependencies>
82 88
 
83 89
 	<build>

+ 11
- 4
src/main/scala/net/abhinavsarkar/ircsearch/lucene/Indexer.scala View File

@@ -20,6 +20,7 @@ import org.apache.lucene.index.{ IndexWriter, IndexWriterConfig }
20 20
 import org.apache.lucene.store.FSDirectory
21 21
 import org.apache.lucene.util.Version
22 22
 
23
+import com.google.common.util.concurrent.RateLimiter
23 24
 import com.typesafe.scalalogging.slf4j.Logging
24 25
 
25 26
 import net.abhinavsarkar.ircsearch.model._
@@ -49,10 +50,13 @@ object Indexer extends Logging {
49 50
   val ContextSize = 2
50 51
   val ContextDurationSecs = 20
51 52
   val IndexingDurationSecs = 10
53
+  val FlushDurationSecs = 60
54
+  val RateLimitPerSec = 1000
52 55
 
53
-  private val indexQueue = new PriorityBlockingQueue[IndexRecord](10000)
56
+  private val indexQueue = new PriorityBlockingQueue[IndexRecord]
54 57
   private val scheduler = Executors.newScheduledThreadPool(2)
55 58
   private val runLock = new ReentrantLock
59
+  private val rateLimiter = RateLimiter.create(RateLimitPerSec)
56 60
   private var indexingFuture : Future[_] = null
57 61
   private var flushFuture : Future[_] = null
58 62
 
@@ -101,7 +105,10 @@ object Indexer extends Logging {
101 105
     s"index-$server-$channel-$botName"
102 106
 
103 107
   def index(indexRequest : IndexRequest) =
104
-    IndexRecord.fromIndexRequest(indexRequest).foreach(indexQueue.put)
108
+    IndexRecord.fromIndexRequest(indexRequest).foreach { rec =>
109
+      rateLimiter.acquire
110
+      indexQueue put rec
111
+    }
105 112
 
106 113
   private def doInLock(f : => Unit) {
107 114
     try {
@@ -161,7 +168,7 @@ object Indexer extends Logging {
161 168
             }
162 169
           }
163 170
 
164
-          if (indexRecBatch.size > windowSize) {
171
+          if (indexRecBatch.size >= windowSize) {
165 172
             indexRecBatch.slice(indexRecBatch.length - 2 * ContextSize, indexRecBatch.length)
166 173
             .zipWithIndex
167 174
             .map { r => if (r._2 < ContextSize) r._1.copy(indexed = true) else r._1 }
@@ -170,7 +177,7 @@ object Indexer extends Logging {
170 177
         }
171 178
       }
172 179
     }
173
-    flushFuture = schedule(0, 10, TimeUnit.SECONDS) {
180
+    flushFuture = schedule(0, FlushDurationSecs, TimeUnit.SECONDS) {
174 181
       doInLock(flush)
175 182
     }
176 183
   }

+ 2
- 2
src/main/scala/net/abhinavsarkar/ircsearch/lucene/Searcher.scala View File

@@ -11,9 +11,9 @@ import scala.collection.mutable.Buffer
11 11
 import org.apache.lucene.analysis.Analyzer
12 12
 import org.apache.lucene.queries.ChainedFilter
13 13
 import org.apache.lucene.queryparser.classic.MultiFieldQueryParser
14
-import org.apache.lucene.search.{ BooleanClause, BooleanQuery,  Filter, FilteredQuery,
14
+import org.apache.lucene.search.{ BooleanClause, BooleanQuery, Filter, FilteredQuery,
15 15
                                   NumericRangeFilter, Query, QueryWrapperFilter, SearcherFactory,
16
-                                  SearcherManager,  Sort, SortField, TermQuery }
16
+                                  SearcherManager, Sort, SortField, TermQuery }
17 17
 import org.apache.lucene.store.FSDirectory
18 18
 
19 19
 import com.typesafe.scalalogging.slf4j.Logging

Loading…
Cancel
Save