Added selective analyzer for selective stemming
This commit is contained in:
parent
2805b6b634
commit
48f25c29d4
@ -8,4 +8,5 @@
|
|||||||
[org.clojure/clojure-contrib "1.2.0"]
|
[org.clojure/clojure-contrib "1.2.0"]
|
||||||
[org.apache.lucene/lucene-core "3.1.0"]
|
[org.apache.lucene/lucene-core "3.1.0"]
|
||||||
[org.apache.lucene/lucene-wordnet "3.1.0"]
|
[org.apache.lucene/lucene-wordnet "3.1.0"]
|
||||||
|
[org.apache.lucene/lucene-analyzers "3.1.0"]
|
||||||
[org/pircbotx "1.3-SNAPSHOT"]])
|
[org/pircbotx "1.3-SNAPSHOT"]])
|
||||||
|
@ -12,7 +12,11 @@
|
|||||||
|
|
||||||
(def *chat-log* (atom []))
|
(def *chat-log* (atom []))
|
||||||
|
|
||||||
(def *analyzer* (standard-analyzer))
|
(def *analyzer*
|
||||||
|
(logging-analyzer
|
||||||
|
(selective-analyzer
|
||||||
|
(stemmer-analyzer (standard-analyzer))
|
||||||
|
#{"message"})))
|
||||||
|
|
||||||
(def *max-hits* 3)
|
(def *max-hits* 3)
|
||||||
|
|
||||||
@ -35,17 +39,18 @@
|
|||||||
|
|
||||||
(defn search-chat-log [index-searcher query-str max-hits analyzer]
|
(defn search-chat-log [index-searcher query-str max-hits analyzer]
|
||||||
(let [qp (query-parser :message analyzer)
|
(let [qp (query-parser :message analyzer)
|
||||||
[query filter] (filterify-query (parse-query qp query-str) #{"user"})
|
raw-query (parse-query qp query-str)
|
||||||
|
[query filter] (filterify-query raw-query #{"user"})
|
||||||
hits (search index-searcher query filter max-hits)]
|
hits (search index-searcher query filter max-hits)]
|
||||||
(println ">>" (count hits) "hits for query:" query)
|
(println "Query:" query)
|
||||||
(println query)
|
(println "Filter:" filter)
|
||||||
(println filter)
|
(println ">>" (count hits) "hits for query:" query-str)
|
||||||
(map
|
(map
|
||||||
#(let [timestamp (-> % :doc :timestamp (Long/parseLong))]
|
#(let [timestamp (-> % :doc :timestamp (Long/parseLong))
|
||||||
|
delta (floor (/ (- (System/currentTimeMillis) timestamp) 1000))]
|
||||||
(format
|
(format
|
||||||
"[%s] %s: %s"
|
"[%s] %s: %s"
|
||||||
(fuzzy-relative-time
|
(fuzzy-relative-time delta)
|
||||||
(floor (/ (- (System/currentTimeMillis) timestamp) 1000)))
|
|
||||||
(-> % :doc :user)
|
(-> % :doc :user)
|
||||||
(-> % :doc :message)))
|
(-> % :doc :message)))
|
||||||
hits)))
|
hits)))
|
||||||
|
@ -1,6 +1,7 @@
|
|||||||
(ns irc-search-bot.lucene
|
(ns irc-search-bot.lucene
|
||||||
(:import [org.apache.lucene.document Document Field Field$Store Field$Index]
|
(:import [org.apache.lucene.document Document Field Field$Store Field$Index]
|
||||||
[org.apache.lucene.store RAMDirectory FSDirectory]
|
[org.apache.lucene.store RAMDirectory FSDirectory]
|
||||||
|
[org.apache.lucene.analysis Analyzer KeywordTokenizer]
|
||||||
[org.apache.lucene.analysis.standard StandardAnalyzer]
|
[org.apache.lucene.analysis.standard StandardAnalyzer]
|
||||||
[org.apache.lucene.util Version]
|
[org.apache.lucene.util Version]
|
||||||
[org.apache.lucene.index IndexWriter IndexWriterConfig IndexReader]
|
[org.apache.lucene.index IndexWriter IndexWriterConfig IndexReader]
|
||||||
@ -38,7 +39,10 @@
|
|||||||
(.add filter-query clause))
|
(.add filter-query clause))
|
||||||
(do
|
(do
|
||||||
(.add new-query clause)))))
|
(.add new-query clause)))))
|
||||||
[new-query (QueryWrapperFilter. filter-query)])
|
[new-query
|
||||||
|
(if (.isEmpty (.clauses filter-query))
|
||||||
|
nil
|
||||||
|
(QueryWrapperFilter. filter-query))])
|
||||||
[query, nil]))
|
[query, nil]))
|
||||||
|
|
||||||
(defn search [^IndexSearcher index-searcher ^Query query ^Filter filter ^Integer max-hits]
|
(defn search [^IndexSearcher index-searcher ^Query query ^Filter filter ^Integer max-hits]
|
||||||
@ -97,5 +101,15 @@
|
|||||||
(defn standard-analyzer []
|
(defn standard-analyzer []
|
||||||
(StandardAnalyzer. *lucene-version*))
|
(StandardAnalyzer. *lucene-version*))
|
||||||
|
|
||||||
(defn stemmer-analyzer []
|
(defn stemmer-analyzer [delegate-analyzer]
|
||||||
(AnalyzerUtil/getPorterStemmerAnalyzer (standard-analyzer)))
|
(AnalyzerUtil/getPorterStemmerAnalyzer delegate-analyzer))
|
||||||
|
|
||||||
|
(defn selective-analyzer [delegate-analyzer analyzable-fields]
|
||||||
|
(proxy [Analyzer] []
|
||||||
|
(tokenStream [field rdr]
|
||||||
|
(if (analyzable-fields field)
|
||||||
|
(.tokenStream delegate-analyzer field rdr)
|
||||||
|
(KeywordTokenizer. rdr)))))
|
||||||
|
|
||||||
|
(defn logging-analyzer [delegate-analyzer]
|
||||||
|
(AnalyzerUtil/getLoggingAnalyzer delegate-analyzer System/out "log"))
|
||||||
|
Loading…
Reference in New Issue
Block a user