Browse Source

Added support for pagination, constraining search by timestamp range. Performance inprovement in searching

Abhinav Sarkar 7 years ago
parent
commit
613e916172

+ 25
- 16
src/main/scala/net/abhinavsarkar/ircsearch/Server.scala View File

@@ -187,22 +187,31 @@ class IndexHandler(indexer: Indexer) extends HttpRequestHandler {
187 187
 object SearchHandler extends HttpRequestHandler {
188 188
   implicit val formats = DefaultFormats
189 189
   override def messageReceived(ctx: ChannelHandlerContext, request: HttpRequest) {
190
-    val method = request.getMethod()
191
-    val searchRequest = if (HttpMethod.POST.equals(method)) {
192
-      val content = request.getContent().toString(Charset.forName("UTF-8"))
193
-      Serialization.read[SearchRequest](content)
194
-    } else if (HttpMethod.GET.equals(method)) {
195
-      val params = new QueryStringDecoder(request.getUri).getParameters
196
-      val server = params("server")(0)
197
-      val channel = params("channel")(0)
198
-      val botName = params("botName")(0)
199
-      val query = params("query")(0)
200
-      new SearchRequest(server, channel, botName, query)
201
-    } else {
202
-      throw new UnsupportedOperationException("HTTP method " + method + " is not supported")
203
-    }
190
+    future {
191
+      val method = request.getMethod()
192
+      val searchRequest = if (HttpMethod.POST.equals(method)) {
193
+        val content = request.getContent().toString(Charset.forName("UTF-8"))
194
+        Serialization.read[SearchRequest](content)
195
+      } else if (HttpMethod.GET.equals(method)) {
196
+        val params = new QueryStringDecoder(request.getUri).getParameters.toMap
197
+        val server = params("server")(0)
198
+        val channel = params("channel")(0)
199
+        val botName = params("botName")(0)
200
+        val query = params("query")(0)
201
+        val page = params.get("page").collect({ case l => l.get(0) })
202
+        val pageSize = params.get("pageSize").collect({ case l => l.get(0) })
203
+        var sr = new SearchRequest(server, channel, botName, query)
204
+        if (page.isDefined)
205
+          sr = sr.copy(page = page.get.toInt)
206
+        if (pageSize.isDefined)
207
+          sr = sr.copy(pageSize = pageSize.get.toInt)
208
+        sr
209
+      } else {
210
+        throw new UnsupportedOperationException("HTTP method " + method + " is not supported")
211
+      }
204 212
 
205
-    val searchResult = Searcher.search(searchRequest)
206
-    logRequest(ctx, request, sendSuccess(ctx, request, Serialization.write(searchResult)))
213
+      val searchResult = Searcher.search(searchRequest)
214
+      logRequest(ctx, request, sendSuccess(ctx, request, Serialization.write(searchResult)))
215
+    } onFailure { case e : Exception => logger.error("Error", e) }
207 216
   }
208 217
 }

+ 2
- 2
src/main/scala/net/abhinavsarkar/ircsearch/lucene/Indexer.scala View File

@@ -32,12 +32,12 @@ class Indexer extends Logging {
32 32
 
33 33
   import Indexer._
34 34
 
35
-  private val indexQueue = new LinkedBlockingQueue[IndexRequest]
35
+  private val indexQueue = new LinkedBlockingQueue[IndexRequest](10000)
36 36
   private val scheduler = Executors.newSingleThreadScheduledExecutor
37 37
   private val runLock = new ReentrantLock
38 38
   private var runFuture : Future[_] = null
39 39
 
40
-  def index(indexRequest : IndexRequest) = indexQueue.offer(indexRequest)
40
+  def index(indexRequest : IndexRequest) = indexQueue.put(indexRequest)
41 41
 
42 42
   def start {
43 43
     logger.info("Starting indexer")

+ 77
- 29
src/main/scala/net/abhinavsarkar/ircsearch/lucene/Searcher.scala View File

@@ -1,19 +1,25 @@
1 1
 package net.abhinavsarkar.ircsearch.lucene
2 2
 
3 3
 import java.io.File
4
+import java.text.ParseException
5
+import java.text.SimpleDateFormat
4 6
 
5 7
 import scala.collection.JavaConversions._
6 8
 import scala.collection.mutable
9
+import scala.collection.mutable.Buffer
7 10
 
8 11
 import org.apache.lucene.analysis.Analyzer
9
-import org.apache.lucene.index.IndexReader
12
+import org.apache.lucene.queries.ChainedFilter
10 13
 import org.apache.lucene.queryparser.classic.QueryParser
11 14
 import org.apache.lucene.search.BooleanClause
12 15
 import org.apache.lucene.search.BooleanQuery
16
+import org.apache.lucene.search.Filter
13 17
 import org.apache.lucene.search.FilteredQuery
14
-import org.apache.lucene.search.IndexSearcher
18
+import org.apache.lucene.search.NumericRangeFilter
15 19
 import org.apache.lucene.search.Query
16 20
 import org.apache.lucene.search.QueryWrapperFilter
21
+import org.apache.lucene.search.SearcherFactory
22
+import org.apache.lucene.search.SearcherManager
17 23
 import org.apache.lucene.search.Sort
18 24
 import org.apache.lucene.search.SortField
19 25
 import org.apache.lucene.search.TermQuery
@@ -25,41 +31,75 @@ import net.abhinavsarkar.ircsearch.model._
25 31
 
26 32
 object Searcher extends Logging {
27 33
 
28
-  private def mkIndexSearcher(dirPath : String) : IndexSearcher = {
29
-    val indexDir = new File(dirPath)
30
-    assert(indexDir.exists && indexDir.isDirectory)
34
+  val MaxHits = 1000
31 35
 
32
-    new IndexSearcher(IndexReader.open(FSDirectory.open(indexDir)))
36
+  val readers = mutable.Map[String, SearcherManager]()
37
+
38
+  private def mkIndexSearcher(dirPath : String) : SearcherManager = {
39
+    synchronized {
40
+      if (!(readers contains dirPath)) {
41
+        val indexDir = new File(dirPath)
42
+        assert(indexDir.exists && indexDir.isDirectory)
43
+
44
+        val dir = FSDirectory.open(indexDir)
45
+        readers += (dirPath -> new SearcherManager(dir, new SearcherFactory))
46
+
47
+      }
48
+    }
49
+
50
+    readers(dirPath)
33 51
   }
34 52
 
35 53
   private def mkQueryParser(analyzer : Analyzer) =
36 54
     new QueryParser(Indexer.LUCENE_VERSION, ChatLine.MSG, analyzer)
37 55
 
38
-  private def filterifyQuery(query : Query, mustFields : Set[String]) : Query =
56
+  private def filterifyQuery(query : Query) : Query =
39 57
     query match {
40 58
       case boolQuery: BooleanQuery => {
41 59
         val newQuery = new BooleanQuery
42
-        val filterQuery = new BooleanQuery
60
+        val filters = Buffer[Filter]()
43 61
         for (clause <- boolQuery.getClauses) {
44 62
           val subQuery = clause.getQuery
45 63
           if (subQuery.isInstanceOf[TermQuery]) {
46 64
             val termQuery = subQuery.asInstanceOf[TermQuery]
47 65
             val field = termQuery.getTerm.field
48
-            if (mustFields contains field) {
49
-              clause.setOccur(BooleanClause.Occur.MUST)
50
-              filterQuery.add(clause)
51
-            } else {
52
-              newQuery.add(clause)
66
+            val sdf = new SimpleDateFormat("yyMMdd")
67
+            field match {
68
+              case ChatLine.USER => {
69
+                val filterQuery = new BooleanQuery
70
+                clause.setOccur(BooleanClause.Occur.MUST)
71
+                filterQuery.add(clause)
72
+                filters += new QueryWrapperFilter(filterQuery)
73
+              }
74
+              case "before" => {
75
+                  try {
76
+                    val ts = sdf.parse(termQuery.getTerm.text).getTime
77
+                    filters += NumericRangeFilter.newLongRange(
78
+                        ChatLine.TS, 0, ts, true, true)
79
+                  } catch {
80
+                    case e : ParseException => {}
81
+                  }
82
+              }
83
+              case "after" => {
84
+                try {
85
+                  val ts = sdf.parse(termQuery.getTerm.text).getTime
86
+                  filters += NumericRangeFilter.newLongRange(
87
+                      ChatLine.TS, ts, java.lang.Long.MAX_VALUE, true, true)
88
+                } catch {
89
+                  case e : ParseException => {}
90
+                }
91
+              }
92
+              case _ => newQuery.add(clause)
53 93
             }
54 94
           } else {
55 95
             newQuery.add(clause)
56 96
           }
57 97
         }
58 98
 
59
-        if (filterQuery.clauses.isEmpty)
99
+        if (filters.isEmpty)
60 100
           newQuery
61 101
         else
62
-          new FilteredQuery(newQuery, new QueryWrapperFilter(filterQuery))
102
+          new FilteredQuery(newQuery, new ChainedFilter(filters.toArray, ChainedFilter.AND))
63 103
       }
64 104
       case _ => query
65 105
     }
@@ -72,9 +112,9 @@ object Searcher extends Logging {
72 112
     val analyzer = Indexer.mkAnalyzer
73 113
     try {
74 114
       val queryParser = mkQueryParser(analyzer)
75
-      val query = filterifyQuery(queryParser.parse(searchRequest.query), Set(ChatLine.USER))
115
+      val query = filterifyQuery(queryParser.parse(searchRequest.query))
76 116
       logger.debug("Query: {}", query)
77
-      val (totalResults, results) = doSearch(indexDir, query, searchRequest.pageSize)
117
+      val (totalResults, results) = doSearch(indexDir, query, searchRequest.page, searchRequest.pageSize)
78 118
       val searchResults = SearchResult.fromSearchRequest(searchRequest)
79 119
         .copy(totalResults = totalResults, chatLines = results.map(_._1))
80 120
       logger.debug("Search results: {}", searchResults)
@@ -84,21 +124,29 @@ object Searcher extends Logging {
84 124
     }
85 125
   }
86 126
 
87
-  private def doSearch(indexDir : String, query : Query, maxHits : Int)
127
+  private def doSearch(indexDir : String, query : Query, page : Int, pageSize : Int)
88 128
     : (Int, List[(ChatLine, Float)]) = {
89
-    val indexSearcher = mkIndexSearcher(indexDir)
90
-    val topDocs = indexSearcher.search(query, maxHits,
91
-        new Sort(SortField.FIELD_SCORE, new SortField(ChatLine.TS, SortField.Type.LONG, true)))
92
-    val docs = topDocs.scoreDocs.map { sd =>
93
-      val score = sd.score
94
-      val doc = indexSearcher.doc(sd.doc).getFields.foldLeft(mutable.Map[String, String]()) {
95
-        (map, field) => map += (field.name -> field.stringValue)
96
-      }
129
+    val searcherMgr = mkIndexSearcher(indexDir)
130
+    searcherMgr.maybeRefresh
131
+    val indexSearcher = searcherMgr.acquire()
132
+    try {
133
+      val topDocs = indexSearcher.search(query, MaxHits.min((page + 1) * pageSize),
134
+          new Sort(SortField.FIELD_SCORE, new SortField(ChatLine.TS, SortField.Type.LONG, true)))
135
+      val docs = topDocs.scoreDocs
136
+        .drop(page * pageSize)
137
+        .map { sd =>
138
+          val score = sd.score
139
+          val doc = indexSearcher.doc(sd.doc).getFields.foldLeft(mutable.Map[String, String]()) {
140
+            (map, field) => map += (field.name -> field.stringValue)
141
+        }
97 142
 
98
-      val chatLine = new ChatLine(doc(ChatLine.USER), doc(ChatLine.TS).toLong, doc(ChatLine.MSG))
99
-      (chatLine, score)
143
+        val chatLine = new ChatLine(doc(ChatLine.USER), doc(ChatLine.TS).toLong, doc(ChatLine.MSG))
144
+        (chatLine, score)
145
+      }
146
+      (topDocs.totalHits, docs.toList)
147
+    } finally {
148
+      searcherMgr.release(indexSearcher)
100 149
     }
101
-    (topDocs.totalHits, docs.toList)
102 150
   }
103 151
 
104 152
 }

Loading…
Cancel
Save