diff --git a/project.clj b/project.clj index f4b2091..04c4828 100644 --- a/project.clj +++ b/project.clj @@ -8,4 +8,5 @@ [org.clojure/clojure-contrib "1.2.0"] [org.apache.lucene/lucene-core "3.1.0"] [org.apache.lucene/lucene-wordnet "3.1.0"] + [org.apache.lucene/lucene-analyzers "3.1.0"] [org/pircbotx "1.3-SNAPSHOT"]]) diff --git a/src/irc_search_bot/core.clj b/src/irc_search_bot/core.clj index 0ab85d7..2606162 100644 --- a/src/irc_search_bot/core.clj +++ b/src/irc_search_bot/core.clj @@ -12,7 +12,11 @@ (def *chat-log* (atom [])) -(def *analyzer* (standard-analyzer)) +(def *analyzer* + (logging-analyzer + (selective-analyzer + (stemmer-analyzer (standard-analyzer)) + #{"message"}))) (def *max-hits* 3) @@ -35,17 +39,18 @@ (defn search-chat-log [index-searcher query-str max-hits analyzer] (let [qp (query-parser :message analyzer) - [query filter] (filterify-query (parse-query qp query-str) #{"user"}) + raw-query (parse-query qp query-str) + [query filter] (filterify-query raw-query #{"user"}) hits (search index-searcher query filter max-hits)] - (println ">>" (count hits) "hits for query:" query) - (println query) - (println filter) + (println "Query:" query) + (println "Filter:" filter) + (println ">>" (count hits) "hits for query:" query-str) (map - #(let [timestamp (-> % :doc :timestamp (Long/parseLong))] + #(let [timestamp (-> % :doc :timestamp (Long/parseLong)) + delta (floor (/ (- (System/currentTimeMillis) timestamp) 1000))] (format "[%s] %s: %s" - (fuzzy-relative-time - (floor (/ (- (System/currentTimeMillis) timestamp) 1000))) + (fuzzy-relative-time delta) (-> % :doc :user) (-> % :doc :message))) hits))) diff --git a/src/irc_search_bot/lucene.clj b/src/irc_search_bot/lucene.clj index de98864..9c98b74 100644 --- a/src/irc_search_bot/lucene.clj +++ b/src/irc_search_bot/lucene.clj @@ -1,6 +1,7 @@ (ns irc-search-bot.lucene (:import [org.apache.lucene.document Document Field Field$Store Field$Index] [org.apache.lucene.store RAMDirectory FSDirectory] + [org.apache.lucene.analysis Analyzer KeywordTokenizer] [org.apache.lucene.analysis.standard StandardAnalyzer] [org.apache.lucene.util Version] [org.apache.lucene.index IndexWriter IndexWriterConfig IndexReader] @@ -38,7 +39,10 @@ (.add filter-query clause)) (do (.add new-query clause))))) - [new-query (QueryWrapperFilter. filter-query)]) + [new-query + (if (.isEmpty (.clauses filter-query)) + nil + (QueryWrapperFilter. filter-query))]) [query, nil])) (defn search [^IndexSearcher index-searcher ^Query query ^Filter filter ^Integer max-hits] @@ -97,5 +101,15 @@ (defn standard-analyzer [] (StandardAnalyzer. *lucene-version*)) -(defn stemmer-analyzer [] - (AnalyzerUtil/getPorterStemmerAnalyzer (standard-analyzer))) +(defn stemmer-analyzer [delegate-analyzer] + (AnalyzerUtil/getPorterStemmerAnalyzer delegate-analyzer)) + +(defn selective-analyzer [delegate-analyzer analyzable-fields] + (proxy [Analyzer] [] + (tokenStream [field rdr] + (if (analyzable-fields field) + (.tokenStream delegate-analyzer field rdr) + (KeywordTokenizer. rdr))))) + +(defn logging-analyzer [delegate-analyzer] + (AnalyzerUtil/getLoggingAnalyzer delegate-analyzer System/out "log"))