Added selective analyzer for selective stemming

master
Abhinav Sarkar 2011-06-01 09:27:22 +05:30
parent 2805b6b634
commit 48f25c29d4
3 changed files with 31 additions and 11 deletions

View File

@ -8,4 +8,5 @@
[org.clojure/clojure-contrib "1.2.0"] [org.clojure/clojure-contrib "1.2.0"]
[org.apache.lucene/lucene-core "3.1.0"] [org.apache.lucene/lucene-core "3.1.0"]
[org.apache.lucene/lucene-wordnet "3.1.0"] [org.apache.lucene/lucene-wordnet "3.1.0"]
[org.apache.lucene/lucene-analyzers "3.1.0"]
[org/pircbotx "1.3-SNAPSHOT"]]) [org/pircbotx "1.3-SNAPSHOT"]])

View File

@ -12,7 +12,11 @@
(def *chat-log* (atom [])) (def *chat-log* (atom []))
(def *analyzer* (standard-analyzer)) (def *analyzer*
(logging-analyzer
(selective-analyzer
(stemmer-analyzer (standard-analyzer))
#{"message"})))
(def *max-hits* 3) (def *max-hits* 3)
@ -35,17 +39,18 @@
(defn search-chat-log [index-searcher query-str max-hits analyzer] (defn search-chat-log [index-searcher query-str max-hits analyzer]
(let [qp (query-parser :message analyzer) (let [qp (query-parser :message analyzer)
[query filter] (filterify-query (parse-query qp query-str) #{"user"}) raw-query (parse-query qp query-str)
[query filter] (filterify-query raw-query #{"user"})
hits (search index-searcher query filter max-hits)] hits (search index-searcher query filter max-hits)]
(println ">>" (count hits) "hits for query:" query) (println "Query:" query)
(println query) (println "Filter:" filter)
(println filter) (println ">>" (count hits) "hits for query:" query-str)
(map (map
#(let [timestamp (-> % :doc :timestamp (Long/parseLong))] #(let [timestamp (-> % :doc :timestamp (Long/parseLong))
delta (floor (/ (- (System/currentTimeMillis) timestamp) 1000))]
(format (format
"[%s] %s: %s" "[%s] %s: %s"
(fuzzy-relative-time (fuzzy-relative-time delta)
(floor (/ (- (System/currentTimeMillis) timestamp) 1000)))
(-> % :doc :user) (-> % :doc :user)
(-> % :doc :message))) (-> % :doc :message)))
hits))) hits)))

View File

@ -1,6 +1,7 @@
(ns irc-search-bot.lucene (ns irc-search-bot.lucene
(:import [org.apache.lucene.document Document Field Field$Store Field$Index] (:import [org.apache.lucene.document Document Field Field$Store Field$Index]
[org.apache.lucene.store RAMDirectory FSDirectory] [org.apache.lucene.store RAMDirectory FSDirectory]
[org.apache.lucene.analysis Analyzer KeywordTokenizer]
[org.apache.lucene.analysis.standard StandardAnalyzer] [org.apache.lucene.analysis.standard StandardAnalyzer]
[org.apache.lucene.util Version] [org.apache.lucene.util Version]
[org.apache.lucene.index IndexWriter IndexWriterConfig IndexReader] [org.apache.lucene.index IndexWriter IndexWriterConfig IndexReader]
@ -38,7 +39,10 @@
(.add filter-query clause)) (.add filter-query clause))
(do (do
(.add new-query clause))))) (.add new-query clause)))))
[new-query (QueryWrapperFilter. filter-query)]) [new-query
(if (.isEmpty (.clauses filter-query))
nil
(QueryWrapperFilter. filter-query))])
[query, nil])) [query, nil]))
(defn search [^IndexSearcher index-searcher ^Query query ^Filter filter ^Integer max-hits] (defn search [^IndexSearcher index-searcher ^Query query ^Filter filter ^Integer max-hits]
@ -97,5 +101,15 @@
(defn standard-analyzer [] (defn standard-analyzer []
(StandardAnalyzer. *lucene-version*)) (StandardAnalyzer. *lucene-version*))
(defn stemmer-analyzer [] (defn stemmer-analyzer [delegate-analyzer]
(AnalyzerUtil/getPorterStemmerAnalyzer (standard-analyzer))) (AnalyzerUtil/getPorterStemmerAnalyzer delegate-analyzer))
(defn selective-analyzer [delegate-analyzer analyzable-fields]
(proxy [Analyzer] []
(tokenStream [field rdr]
(if (analyzable-fields field)
(.tokenStream delegate-analyzer field rdr)
(KeywordTokenizer. rdr)))))
(defn logging-analyzer [delegate-analyzer]
(AnalyzerUtil/getLoggingAnalyzer delegate-analyzer System/out "log"))