Added selective analyzer for selective stemming
This commit is contained in:
parent
2805b6b634
commit
48f25c29d4
@ -8,4 +8,5 @@
|
||||
[org.clojure/clojure-contrib "1.2.0"]
|
||||
[org.apache.lucene/lucene-core "3.1.0"]
|
||||
[org.apache.lucene/lucene-wordnet "3.1.0"]
|
||||
[org.apache.lucene/lucene-analyzers "3.1.0"]
|
||||
[org/pircbotx "1.3-SNAPSHOT"]])
|
||||
|
@ -12,7 +12,11 @@
|
||||
|
||||
(def *chat-log* (atom []))
|
||||
|
||||
(def *analyzer* (standard-analyzer))
|
||||
(def *analyzer*
|
||||
(logging-analyzer
|
||||
(selective-analyzer
|
||||
(stemmer-analyzer (standard-analyzer))
|
||||
#{"message"})))
|
||||
|
||||
(def *max-hits* 3)
|
||||
|
||||
@ -35,17 +39,18 @@
|
||||
|
||||
(defn search-chat-log [index-searcher query-str max-hits analyzer]
|
||||
(let [qp (query-parser :message analyzer)
|
||||
[query filter] (filterify-query (parse-query qp query-str) #{"user"})
|
||||
raw-query (parse-query qp query-str)
|
||||
[query filter] (filterify-query raw-query #{"user"})
|
||||
hits (search index-searcher query filter max-hits)]
|
||||
(println ">>" (count hits) "hits for query:" query)
|
||||
(println query)
|
||||
(println filter)
|
||||
(println "Query:" query)
|
||||
(println "Filter:" filter)
|
||||
(println ">>" (count hits) "hits for query:" query-str)
|
||||
(map
|
||||
#(let [timestamp (-> % :doc :timestamp (Long/parseLong))]
|
||||
#(let [timestamp (-> % :doc :timestamp (Long/parseLong))
|
||||
delta (floor (/ (- (System/currentTimeMillis) timestamp) 1000))]
|
||||
(format
|
||||
"[%s] %s: %s"
|
||||
(fuzzy-relative-time
|
||||
(floor (/ (- (System/currentTimeMillis) timestamp) 1000)))
|
||||
(fuzzy-relative-time delta)
|
||||
(-> % :doc :user)
|
||||
(-> % :doc :message)))
|
||||
hits)))
|
||||
|
@ -1,6 +1,7 @@
|
||||
(ns irc-search-bot.lucene
|
||||
(:import [org.apache.lucene.document Document Field Field$Store Field$Index]
|
||||
[org.apache.lucene.store RAMDirectory FSDirectory]
|
||||
[org.apache.lucene.analysis Analyzer KeywordTokenizer]
|
||||
[org.apache.lucene.analysis.standard StandardAnalyzer]
|
||||
[org.apache.lucene.util Version]
|
||||
[org.apache.lucene.index IndexWriter IndexWriterConfig IndexReader]
|
||||
@ -38,7 +39,10 @@
|
||||
(.add filter-query clause))
|
||||
(do
|
||||
(.add new-query clause)))))
|
||||
[new-query (QueryWrapperFilter. filter-query)])
|
||||
[new-query
|
||||
(if (.isEmpty (.clauses filter-query))
|
||||
nil
|
||||
(QueryWrapperFilter. filter-query))])
|
||||
[query, nil]))
|
||||
|
||||
(defn search [^IndexSearcher index-searcher ^Query query ^Filter filter ^Integer max-hits]
|
||||
@ -97,5 +101,15 @@
|
||||
(defn standard-analyzer []
|
||||
(StandardAnalyzer. *lucene-version*))
|
||||
|
||||
(defn stemmer-analyzer []
|
||||
(AnalyzerUtil/getPorterStemmerAnalyzer (standard-analyzer)))
|
||||
(defn stemmer-analyzer [delegate-analyzer]
|
||||
(AnalyzerUtil/getPorterStemmerAnalyzer delegate-analyzer))
|
||||
|
||||
(defn selective-analyzer [delegate-analyzer analyzable-fields]
|
||||
(proxy [Analyzer] []
|
||||
(tokenStream [field rdr]
|
||||
(if (analyzable-fields field)
|
||||
(.tokenStream delegate-analyzer field rdr)
|
||||
(KeywordTokenizer. rdr)))))
|
||||
|
||||
(defn logging-analyzer [delegate-analyzer]
|
||||
(AnalyzerUtil/getLoggingAnalyzer delegate-analyzer System/out "log"))
|
||||
|
Loading…
Reference in New Issue
Block a user