1
0
Fork 0

Added rate limiter for indexing

master
Abhinav Sarkar 2013-05-21 14:11:04 +05:30
Ursprung c8dcc52766
Commit f577cd5fbd
3 geänderte Dateien mit 21 neuen und 8 gelöschten Zeilen

10
pom.xml
Datei anzeigen

@ -11,6 +11,7 @@
<maven.compiler.target>1.6</maven.compiler.target>
<encoding>UTF-8</encoding>
<scala.version>2.10.0</scala.version>
<scala.majorversion>2.10</scala.majorversion>
<lucene.version>4.3.0</lucene.version>
<project.dependencyDir>${project.build.directory}/dependency</project.dependencyDir>
</properties>
@ -39,7 +40,7 @@
</dependency>
<dependency>
<groupId>com.typesafe</groupId>
<artifactId>scalalogging-slf4j_2.10</artifactId>
<artifactId>scalalogging-slf4j_${scala.majorversion}</artifactId>
<version>1.0.1</version>
</dependency>
<dependency>
@ -50,7 +51,7 @@
</dependency>
<dependency>
<groupId>net.liftweb</groupId>
<artifactId>lift-json_2.10</artifactId>
<artifactId>lift-json_${scala.majorversion}</artifactId>
<version>2.5-RC5</version>
</dependency>
<dependency>
@ -78,6 +79,11 @@
<artifactId>opencsv</artifactId>
<version>2.3</version>
</dependency>
<dependency>
<groupId>com.google.guava</groupId>
<artifactId>guava</artifactId>
<version>14.0.1</version>
</dependency>
</dependencies>
<build>

Datei anzeigen

@ -20,6 +20,7 @@ import org.apache.lucene.index.{ IndexWriter, IndexWriterConfig }
import org.apache.lucene.store.FSDirectory
import org.apache.lucene.util.Version
import com.google.common.util.concurrent.RateLimiter
import com.typesafe.scalalogging.slf4j.Logging
import net.abhinavsarkar.ircsearch.model._
@ -49,10 +50,13 @@ object Indexer extends Logging {
val ContextSize = 2
val ContextDurationSecs = 20
val IndexingDurationSecs = 10
val FlushDurationSecs = 60
val RateLimitPerSec = 1000
private val indexQueue = new PriorityBlockingQueue[IndexRecord](10000)
private val indexQueue = new PriorityBlockingQueue[IndexRecord]
private val scheduler = Executors.newScheduledThreadPool(2)
private val runLock = new ReentrantLock
private val rateLimiter = RateLimiter.create(RateLimitPerSec)
private var indexingFuture : Future[_] = null
private var flushFuture : Future[_] = null
@ -101,7 +105,10 @@ object Indexer extends Logging {
s"index-$server-$channel-$botName"
def index(indexRequest : IndexRequest) =
IndexRecord.fromIndexRequest(indexRequest).foreach(indexQueue.put)
IndexRecord.fromIndexRequest(indexRequest).foreach { rec =>
rateLimiter.acquire
indexQueue put rec
}
private def doInLock(f : => Unit) {
try {
@ -161,7 +168,7 @@ object Indexer extends Logging {
}
}
if (indexRecBatch.size > windowSize) {
if (indexRecBatch.size >= windowSize) {
indexRecBatch.slice(indexRecBatch.length - 2 * ContextSize, indexRecBatch.length)
.zipWithIndex
.map { r => if (r._2 < ContextSize) r._1.copy(indexed = true) else r._1 }
@ -170,7 +177,7 @@ object Indexer extends Logging {
}
}
}
flushFuture = schedule(0, 10, TimeUnit.SECONDS) {
flushFuture = schedule(0, FlushDurationSecs, TimeUnit.SECONDS) {
doInLock(flush)
}
}

Datei anzeigen

@ -11,9 +11,9 @@ import scala.collection.mutable.Buffer
import org.apache.lucene.analysis.Analyzer
import org.apache.lucene.queries.ChainedFilter
import org.apache.lucene.queryparser.classic.MultiFieldQueryParser
import org.apache.lucene.search.{ BooleanClause, BooleanQuery, Filter, FilteredQuery,
import org.apache.lucene.search.{ BooleanClause, BooleanQuery, Filter, FilteredQuery,
NumericRangeFilter, Query, QueryWrapperFilter, SearcherFactory,
SearcherManager, Sort, SortField, TermQuery }
SearcherManager, Sort, SortField, TermQuery }
import org.apache.lucene.store.FSDirectory
import com.typesafe.scalalogging.slf4j.Logging