Added selective analyzer for selective stemming

master
Abhinav Sarkar 2011-06-01 09:27:22 +05:30
parent 2805b6b634
commit 48f25c29d4
3 changed files with 31 additions and 11 deletions

View File

@ -8,4 +8,5 @@
[org.clojure/clojure-contrib "1.2.0"]
[org.apache.lucene/lucene-core "3.1.0"]
[org.apache.lucene/lucene-wordnet "3.1.0"]
[org.apache.lucene/lucene-analyzers "3.1.0"]
[org/pircbotx "1.3-SNAPSHOT"]])

View File

@ -12,7 +12,11 @@
(def *chat-log* (atom []))
(def *analyzer* (standard-analyzer))
(def *analyzer*
(logging-analyzer
(selective-analyzer
(stemmer-analyzer (standard-analyzer))
#{"message"})))
(def *max-hits* 3)
@ -35,17 +39,18 @@
(defn search-chat-log [index-searcher query-str max-hits analyzer]
(let [qp (query-parser :message analyzer)
[query filter] (filterify-query (parse-query qp query-str) #{"user"})
raw-query (parse-query qp query-str)
[query filter] (filterify-query raw-query #{"user"})
hits (search index-searcher query filter max-hits)]
(println ">>" (count hits) "hits for query:" query)
(println query)
(println filter)
(println "Query:" query)
(println "Filter:" filter)
(println ">>" (count hits) "hits for query:" query-str)
(map
#(let [timestamp (-> % :doc :timestamp (Long/parseLong))]
#(let [timestamp (-> % :doc :timestamp (Long/parseLong))
delta (floor (/ (- (System/currentTimeMillis) timestamp) 1000))]
(format
"[%s] %s: %s"
(fuzzy-relative-time
(floor (/ (- (System/currentTimeMillis) timestamp) 1000)))
(fuzzy-relative-time delta)
(-> % :doc :user)
(-> % :doc :message)))
hits)))

View File

@ -1,6 +1,7 @@
(ns irc-search-bot.lucene
(:import [org.apache.lucene.document Document Field Field$Store Field$Index]
[org.apache.lucene.store RAMDirectory FSDirectory]
[org.apache.lucene.analysis Analyzer KeywordTokenizer]
[org.apache.lucene.analysis.standard StandardAnalyzer]
[org.apache.lucene.util Version]
[org.apache.lucene.index IndexWriter IndexWriterConfig IndexReader]
@ -38,7 +39,10 @@
(.add filter-query clause))
(do
(.add new-query clause)))))
[new-query (QueryWrapperFilter. filter-query)])
[new-query
(if (.isEmpty (.clauses filter-query))
nil
(QueryWrapperFilter. filter-query))])
[query, nil]))
(defn search [^IndexSearcher index-searcher ^Query query ^Filter filter ^Integer max-hits]
@ -97,5 +101,15 @@
(defn standard-analyzer []
(StandardAnalyzer. *lucene-version*))
(defn stemmer-analyzer []
(AnalyzerUtil/getPorterStemmerAnalyzer (standard-analyzer)))
(defn stemmer-analyzer [delegate-analyzer]
(AnalyzerUtil/getPorterStemmerAnalyzer delegate-analyzer))
(defn selective-analyzer [delegate-analyzer analyzable-fields]
(proxy [Analyzer] []
(tokenStream [field rdr]
(if (analyzable-fields field)
(.tokenStream delegate-analyzer field rdr)
(KeywordTokenizer. rdr)))))
(defn logging-analyzer [delegate-analyzer]
(AnalyzerUtil/getLoggingAnalyzer delegate-analyzer System/out "log"))