Added lucene indexer and searcher. Added WS endpoints for them
This commit is contained in:
parent
e68434b77b
commit
c742979d6e
35
pom.xml
35
pom.xml
@ -10,7 +10,8 @@
|
||||
<maven.compiler.source>1.6</maven.compiler.source>
|
||||
<maven.compiler.target>1.6</maven.compiler.target>
|
||||
<encoding>UTF-8</encoding>
|
||||
<scala.version>2.10.1</scala.version>
|
||||
<scala.version>2.10.0</scala.version>
|
||||
<lucene.version>4.3.0</lucene.version>
|
||||
<project.dependencyDir>${project.build.directory}/dependency</project.dependencyDir>
|
||||
</properties>
|
||||
|
||||
@ -25,6 +26,11 @@
|
||||
<artifactId>scala-reflect</artifactId>
|
||||
<version>${scala.version}</version>
|
||||
</dependency>
|
||||
<dependency>
|
||||
<groupId>org.scala-lang</groupId>
|
||||
<artifactId>scala-compiler</artifactId>
|
||||
<version>${scala.version}</version>
|
||||
</dependency>
|
||||
<dependency>
|
||||
<groupId>io.netty</groupId>
|
||||
<artifactId>netty</artifactId>
|
||||
@ -42,8 +48,31 @@
|
||||
<version>1.0.0</version>
|
||||
<scope>runtime</scope>
|
||||
</dependency>
|
||||
|
||||
<!-- Test -->
|
||||
<dependency>
|
||||
<groupId>net.liftweb</groupId>
|
||||
<artifactId>lift-json_2.10</artifactId>
|
||||
<version>2.5-RC5</version>
|
||||
</dependency>
|
||||
<dependency>
|
||||
<groupId>org.scala-lang</groupId>
|
||||
<artifactId>jline</artifactId>
|
||||
<version>2.11.0-M2</version>
|
||||
</dependency>
|
||||
<dependency>
|
||||
<groupId>org.apache.lucene</groupId>
|
||||
<artifactId>lucene-core</artifactId>
|
||||
<version>${lucene.version}</version>
|
||||
</dependency>
|
||||
<dependency>
|
||||
<groupId>org.apache.lucene</groupId>
|
||||
<artifactId>lucene-analyzers-common</artifactId>
|
||||
<version>${lucene.version}</version>
|
||||
</dependency>
|
||||
<dependency>
|
||||
<groupId>org.apache.lucene</groupId>
|
||||
<artifactId>lucene-queryparser</artifactId>
|
||||
<version>${lucene.version}</version>
|
||||
</dependency>
|
||||
</dependencies>
|
||||
|
||||
<build>
|
||||
|
@ -36,6 +36,7 @@ trait HttpRequestHandler extends ChannelInboundMessageHandlerAdapter[HttpRequest
|
||||
protected def sendSuccess(ctx : ChannelHandlerContext, request : HttpRequest, body : String) : HttpResponse = {
|
||||
val response = new DefaultHttpResponse(HttpVersion.HTTP_1_1, HttpResponseStatus.OK)
|
||||
response.setContent(Unpooled.copiedBuffer(body.getBytes))
|
||||
response.setHeader(CONTENT_TYPE, "application/json")
|
||||
writeResponse(ctx, request, response)
|
||||
response
|
||||
}
|
||||
|
@ -1,7 +1,13 @@
|
||||
package net.abhinavsarkar.ircsearch
|
||||
|
||||
import java.net.InetSocketAddress
|
||||
import java.nio.charset.Charset
|
||||
|
||||
import scala.concurrent.ExecutionContext.Implicits._
|
||||
import scala.concurrent.future
|
||||
|
||||
import com.typesafe.scalalogging.slf4j.Logging
|
||||
|
||||
import io.netty.bootstrap.ServerBootstrap
|
||||
import io.netty.channel.ChannelHandler.Sharable
|
||||
import io.netty.channel.ChannelHandlerContext
|
||||
@ -14,11 +20,12 @@ import io.netty.handler.codec.http.HttpContentCompressor
|
||||
import io.netty.handler.codec.http.HttpRequest
|
||||
import io.netty.handler.codec.http.HttpRequestDecoder
|
||||
import io.netty.handler.codec.http.HttpResponseEncoder
|
||||
import io.netty.handler.codec.http.DefaultHttpResponse
|
||||
import io.netty.handler.codec.http.HttpVersion
|
||||
import io.netty.handler.codec.http.HttpResponseStatus
|
||||
import io.netty.buffer.Unpooled
|
||||
import java.nio.charset.Charset
|
||||
import net.abhinavsarkar.ircsearch.lucene.Indexer
|
||||
import net.abhinavsarkar.ircsearch.lucene.Searcher
|
||||
import net.abhinavsarkar.ircsearch.model.IndexRequest
|
||||
import net.abhinavsarkar.ircsearch.model.SearchRequest
|
||||
import net.liftweb.json.DefaultFormats
|
||||
import net.liftweb.json.Serialization
|
||||
|
||||
object Server extends App with Logging {
|
||||
|
||||
@ -31,13 +38,12 @@ object Server extends App with Logging {
|
||||
|
||||
val httpRequestRouter = new HttpRequestRouter {
|
||||
val Echo = "^/echo$".r
|
||||
val Index = "^/index$".r
|
||||
val Search = "^/search$".r
|
||||
def route = {
|
||||
case Echo() => new HttpRequestHandler {
|
||||
override def messageReceived(ctx: ChannelHandlerContext, request: HttpRequest) {
|
||||
val content = request.getContent().toString(Charset.forName("UTF-8"))
|
||||
logRequest(ctx, request, sendSuccess(ctx, request, content))
|
||||
}
|
||||
}
|
||||
case Echo() => EchoHandler
|
||||
case Index() => IndexHandler
|
||||
case Search() => SearchHandler
|
||||
}
|
||||
}
|
||||
|
||||
@ -58,7 +64,8 @@ object Server extends App with Logging {
|
||||
Runtime.getRuntime.addShutdownHook(
|
||||
new Thread("ShutdownHook") {
|
||||
override def run {
|
||||
stopServer(server);
|
||||
stopServer(server)
|
||||
IndexHandler.stop
|
||||
}
|
||||
})
|
||||
|
||||
@ -67,7 +74,8 @@ object Server extends App with Logging {
|
||||
} catch {
|
||||
case e : Exception => {
|
||||
logger.error("Exception while running server. Stopping server", e)
|
||||
stopServer(server);
|
||||
stopServer(server)
|
||||
IndexHandler.stop
|
||||
}
|
||||
}
|
||||
}
|
||||
@ -78,4 +86,38 @@ object Server extends App with Logging {
|
||||
logger.info("Stopped server")
|
||||
}
|
||||
|
||||
}
|
||||
}
|
||||
|
||||
@Sharable
|
||||
object EchoHandler extends HttpRequestHandler {
|
||||
override def messageReceived(ctx: ChannelHandlerContext, request: HttpRequest) {
|
||||
val content = request.getContent().toString(Charset.forName("UTF-8"))
|
||||
logRequest(ctx, request, sendSuccess(ctx, request, content))
|
||||
}
|
||||
}
|
||||
|
||||
@Sharable
|
||||
object IndexHandler extends HttpRequestHandler {
|
||||
implicit val formats = DefaultFormats
|
||||
lazy val indexer = { val indexer = new Indexer; indexer.start; indexer }
|
||||
override def messageReceived(ctx: ChannelHandlerContext, request: HttpRequest) {
|
||||
future {
|
||||
val content = request.getContent().toString(Charset.forName("UTF-8"))
|
||||
val indexRequest = Serialization.read[IndexRequest](content)
|
||||
indexer.index(indexRequest)
|
||||
}
|
||||
logRequest(ctx, request, sendDefaultResponse(ctx, request))
|
||||
}
|
||||
def stop = indexer.stop
|
||||
}
|
||||
|
||||
@Sharable
|
||||
object SearchHandler extends HttpRequestHandler {
|
||||
implicit val formats = DefaultFormats
|
||||
override def messageReceived(ctx: ChannelHandlerContext, request: HttpRequest) {
|
||||
val content = request.getContent().toString(Charset.forName("UTF-8"))
|
||||
val searchRequest = Serialization.read[SearchRequest](content)
|
||||
val searchResult = Searcher.search(searchRequest)
|
||||
logRequest(ctx, request, sendSuccess(ctx, request, Serialization.write(searchResult)))
|
||||
}
|
||||
}
|
||||
|
138
src/main/scala/net/abhinavsarkar/ircsearch/lucene/Indexer.scala
Normal file
138
src/main/scala/net/abhinavsarkar/ircsearch/lucene/Indexer.scala
Normal file
@ -0,0 +1,138 @@
|
||||
package net.abhinavsarkar.ircsearch.lucene
|
||||
|
||||
import java.io.File
|
||||
import java.util.ArrayList
|
||||
import java.util.concurrent.Executors
|
||||
import java.util.concurrent.Future
|
||||
import java.util.concurrent.LinkedBlockingQueue
|
||||
import java.util.concurrent.TimeUnit
|
||||
import java.util.concurrent.locks.ReentrantLock
|
||||
|
||||
import scala.collection.JavaConversions._
|
||||
|
||||
import org.apache.lucene.analysis.Analyzer
|
||||
import org.apache.lucene.analysis.core.KeywordAnalyzer
|
||||
import org.apache.lucene.analysis.en.EnglishAnalyzer
|
||||
import org.apache.lucene.analysis.miscellaneous.PerFieldAnalyzerWrapper
|
||||
import org.apache.lucene.analysis.standard.StandardAnalyzer
|
||||
import org.apache.lucene.document.Field
|
||||
import org.apache.lucene.document.FieldType
|
||||
import org.apache.lucene.document.FieldType.NumericType
|
||||
import org.apache.lucene.index.IndexReader
|
||||
import org.apache.lucene.index.IndexWriter
|
||||
import org.apache.lucene.index.IndexWriterConfig
|
||||
import org.apache.lucene.search.IndexSearcher
|
||||
import org.apache.lucene.store.FSDirectory
|
||||
import org.apache.lucene.util.Version
|
||||
|
||||
import com.typesafe.scalalogging.slf4j.Logging
|
||||
|
||||
import net.abhinavsarkar.ircsearch.model.IndexRequest
|
||||
|
||||
class Indexer extends Logging {
|
||||
|
||||
import Indexer._
|
||||
|
||||
private val indexQueue = new LinkedBlockingQueue[IndexRequest]
|
||||
private val scheduler = Executors.newSingleThreadScheduledExecutor
|
||||
private val runLock = new ReentrantLock
|
||||
private var runFuture : Future[_] = null
|
||||
|
||||
def index(indexRequest : IndexRequest) = indexQueue.offer(indexRequest)
|
||||
|
||||
def start {
|
||||
logger.info("Starting indexer")
|
||||
runFuture = scheduler.scheduleWithFixedDelay(
|
||||
new Runnable {
|
||||
def run {
|
||||
try {
|
||||
runLock.lock
|
||||
logger.debug("Running indexer")
|
||||
val indexReqs = new ArrayList[IndexRequest]
|
||||
indexQueue.drainTo(indexReqs)
|
||||
doIndex(indexReqs.toList)
|
||||
} catch {
|
||||
case e : Throwable => logger.error("Exception while running indexer", e)
|
||||
} finally {
|
||||
runLock.unlock
|
||||
}
|
||||
}},
|
||||
0, 10, TimeUnit.SECONDS)
|
||||
}
|
||||
|
||||
def stop {
|
||||
try {
|
||||
runLock.lock
|
||||
if (runFuture != null) {
|
||||
runFuture.cancel(false)
|
||||
runFuture = null
|
||||
}
|
||||
logger.info("Stopped indexer")
|
||||
} finally {
|
||||
runLock.unlock
|
||||
}
|
||||
}
|
||||
|
||||
private def doIndex(indexReqs: List[IndexRequest]) {
|
||||
val indexRequests = indexReqs.groupBy { r =>
|
||||
(r.server, r.channel, r.botName)
|
||||
}
|
||||
|
||||
for (((server, channel, botName), indexRequestBatch) <- indexRequests) {
|
||||
val indexDir = getIndexDir(server, channel, botName)
|
||||
val analyzer = mkAnalyzer
|
||||
val indexWriter = mkIndexWriter(indexDir, analyzer)
|
||||
try {
|
||||
for (indexRequest <- indexRequestBatch;
|
||||
chatLine <- indexRequest.chatLines) {
|
||||
val tsField = mkField("timestamp", chatLine.timestamp.toString, false)
|
||||
val userField = mkField("user", chatLine.user, true)
|
||||
val msgField = mkField("message", chatLine.message)
|
||||
indexWriter.addDocument(List(tsField, userField, msgField), analyzer)
|
||||
logger.debug("Indexed : [{} {} {}] [{}] {}: {}",
|
||||
server, channel, botName, chatLine.timestamp.toString, chatLine.user, chatLine.message)
|
||||
}
|
||||
} finally {
|
||||
indexWriter.close
|
||||
analyzer.close
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
object Indexer {
|
||||
|
||||
val LUCENE_VERSION = Version.LUCENE_43
|
||||
|
||||
def mkAnalyzer : Analyzer = {
|
||||
val defAnalyzer = new StandardAnalyzer(LUCENE_VERSION)
|
||||
val fieldAnalyzers = Map(
|
||||
"user" -> new KeywordAnalyzer,
|
||||
"message" -> new EnglishAnalyzer(LUCENE_VERSION))
|
||||
|
||||
new PerFieldAnalyzerWrapper(defAnalyzer, fieldAnalyzers)
|
||||
}
|
||||
|
||||
private def mkIndexWriter(dirPath : String, analyzer : Analyzer) : IndexWriter = {
|
||||
val indexDir = new File(dirPath)
|
||||
if (indexDir.exists) {
|
||||
assert(indexDir.isDirectory)
|
||||
}
|
||||
new IndexWriter(FSDirectory.open(indexDir), new IndexWriterConfig(LUCENE_VERSION, analyzer))
|
||||
}
|
||||
|
||||
def getIndexDir(server : String, channel : String, botName : String) : String =
|
||||
s"index-$server-$channel-$botName"
|
||||
|
||||
private def mkField(name : String, value : String,
|
||||
tokenized : Boolean = true, numericType : Option[NumericType] = None) : Field = {
|
||||
val fieldType = new FieldType
|
||||
fieldType.setStored(true)
|
||||
fieldType.setIndexed(true)
|
||||
fieldType.setTokenized(tokenized)
|
||||
numericType.foreach { fieldType.setNumericType }
|
||||
new Field(name, value, fieldType)
|
||||
}
|
||||
|
||||
}
|
103
src/main/scala/net/abhinavsarkar/ircsearch/lucene/Searcher.scala
Normal file
103
src/main/scala/net/abhinavsarkar/ircsearch/lucene/Searcher.scala
Normal file
@ -0,0 +1,103 @@
|
||||
package net.abhinavsarkar.ircsearch.lucene
|
||||
|
||||
import com.typesafe.scalalogging.slf4j.Logging
|
||||
import org.apache.lucene.search.IndexSearcher
|
||||
import java.io.File
|
||||
import org.apache.lucene.index.IndexReader
|
||||
import org.apache.lucene.store.FSDirectory
|
||||
import org.apache.lucene.analysis.Analyzer
|
||||
import org.apache.lucene.queryparser.classic.QueryParser
|
||||
import org.apache.lucene.search.Query
|
||||
import scala.collection.immutable.Set
|
||||
import org.apache.lucene.search.BooleanQuery
|
||||
import org.apache.lucene.search.TermQuery
|
||||
import org.apache.lucene.search.BooleanClause
|
||||
import org.apache.lucene.search.QueryWrapperFilter
|
||||
import org.apache.lucene.search.Filter
|
||||
import net.abhinavsarkar.ircsearch.model.SearchRequest
|
||||
import net.abhinavsarkar.ircsearch.model.SearchResult
|
||||
import org.apache.lucene.search.Sort
|
||||
import org.apache.lucene.search.SortField
|
||||
import scala.collection.JavaConversions._
|
||||
import scala.collection.mutable
|
||||
import net.abhinavsarkar.ircsearch.model.ChatLine
|
||||
import net.abhinavsarkar.ircsearch.model.ChatLine
|
||||
import net.abhinavsarkar.ircsearch.model.SearchResult
|
||||
import net.abhinavsarkar.ircsearch.model.SearchResult
|
||||
|
||||
object Searcher extends Logging {
|
||||
|
||||
private def mkIndexSearcher(dirPath : String) : IndexSearcher = {
|
||||
val indexDir = new File(dirPath)
|
||||
assert(indexDir.exists && indexDir.isDirectory)
|
||||
|
||||
new IndexSearcher(IndexReader.open(FSDirectory.open(indexDir)))
|
||||
}
|
||||
|
||||
private def mkQueryParser(analyzer : Analyzer) =
|
||||
new QueryParser(Indexer.LUCENE_VERSION, "message", analyzer)
|
||||
|
||||
private def filterifyQuery(query : Query, mustFields : Set[String]) : (Query, Option[Filter]) =
|
||||
query match {
|
||||
case boolQuery: BooleanQuery => {
|
||||
val newQuery = new BooleanQuery
|
||||
val filterQuery = new BooleanQuery
|
||||
for (clause <- boolQuery.getClauses) {
|
||||
val subQuery = clause.getQuery
|
||||
if (subQuery.isInstanceOf[TermQuery]) {
|
||||
val termQuery = subQuery.asInstanceOf[TermQuery]
|
||||
val field = termQuery.getTerm.field
|
||||
if (mustFields contains field) {
|
||||
clause.setOccur(BooleanClause.Occur.MUST)
|
||||
filterQuery.add(clause)
|
||||
} else {
|
||||
newQuery.add(clause)
|
||||
}
|
||||
} else {
|
||||
newQuery.add(clause)
|
||||
}
|
||||
}
|
||||
|
||||
(newQuery, if (filterQuery.clauses.isEmpty) None else Some(new QueryWrapperFilter(filterQuery)))
|
||||
}
|
||||
case _ => (query, None)
|
||||
}
|
||||
|
||||
def search(searchRequest : SearchRequest) : SearchResult = {
|
||||
logger.debug("Searching : [{} {} {}] {}",
|
||||
searchRequest.server, searchRequest.channel, searchRequest.botName, searchRequest.query)
|
||||
|
||||
val indexDir = Indexer.getIndexDir(searchRequest.server, searchRequest.channel, searchRequest.botName)
|
||||
val analyzer = Indexer.mkAnalyzer
|
||||
try {
|
||||
val queryParser = mkQueryParser(analyzer)
|
||||
val (query, filter) = filterifyQuery(queryParser.parse(searchRequest.query), Set("user"))
|
||||
logger.debug("Query: {}, Filter: {}", query, filter)
|
||||
val (totalResults, results) = doSearch(indexDir, query, filter, searchRequest.pageSize)
|
||||
val searchResults = SearchResult.fromSearchRequest(searchRequest)
|
||||
.copy(totalResults = totalResults, chatLines = results.map(_._1))
|
||||
logger.debug("Search results: {}", searchResults)
|
||||
searchResults
|
||||
} finally {
|
||||
analyzer.close
|
||||
}
|
||||
}
|
||||
|
||||
private def doSearch(indexDir : String, query : Query, filter : Option[Filter], maxHits : Int)
|
||||
: (Int, List[(ChatLine, Float)]) = {
|
||||
val indexSearcher = mkIndexSearcher(indexDir)
|
||||
val topDocs = indexSearcher.search(query, filter.orNull, maxHits,
|
||||
new Sort(SortField.FIELD_SCORE, new SortField("timestamp", SortField.Type.LONG, true)))
|
||||
val docs = topDocs.scoreDocs.map { sd =>
|
||||
val score = sd.score
|
||||
val doc = indexSearcher.doc(sd.doc).getFields.foldLeft(mutable.Map[String, String]()) {
|
||||
(map, field) => map += (field.name -> field.stringValue)
|
||||
}
|
||||
|
||||
val chatLine = new ChatLine(doc("user"), doc("timestamp").toLong, doc("message"))
|
||||
(chatLine, score)
|
||||
}
|
||||
(topDocs.totalHits, docs.toList)
|
||||
}
|
||||
|
||||
}
|
23
src/main/scala/net/abhinavsarkar/ircsearch/model.scala
Normal file
23
src/main/scala/net/abhinavsarkar/ircsearch/model.scala
Normal file
@ -0,0 +1,23 @@
|
||||
package net.abhinavsarkar.ircsearch.model
|
||||
|
||||
|
||||
case class ChatLine(user : String, timestamp : Long, message : String)
|
||||
|
||||
case class IndexRequest(
|
||||
server : String, channel : String, botName : String, chatLines : List[ChatLine])
|
||||
|
||||
case class SearchRequest(
|
||||
server : String, channel : String, botName : String, query: String,
|
||||
page : Int = 0, pageSize : Int = 10)
|
||||
|
||||
case class SearchResult(
|
||||
server : String, channel : String, botName : String, query: String,
|
||||
page : Int, pageSize : Int, totalResults : Int, chatLines : List[ChatLine])
|
||||
|
||||
object SearchResult {
|
||||
def fromSearchRequest(searchRequest : SearchRequest) = searchRequest match {
|
||||
case SearchRequest(server, channel, botName, query, page, pageSize) =>
|
||||
new SearchResult(server, channel, botName, query, page, pageSize, 0, List())
|
||||
}
|
||||
}
|
||||
|
Loading…
Reference in New Issue
Block a user