From 2805b6b634563f222c63ab0c97b02cff335704fd Mon Sep 17 00:00:00 2001 From: Abhinav Sarkar Date: Tue, 31 May 2011 21:43:25 +0530 Subject: [PATCH] Added query filter support, fuzzy timestamps. --- src/irc_search_bot/core.clj | 28 +++++++++++++++++----------- src/irc_search_bot/lucene.clj | 28 ++++++++++++++++++++++++---- src/irc_search_bot/util.clj | 33 +++++++++++++++++++++++++++++++++ 3 files changed, 74 insertions(+), 15 deletions(-) create mode 100644 src/irc_search_bot/util.clj diff --git a/src/irc_search_bot/core.clj b/src/irc_search_bot/core.clj index adcc924..0ab85d7 100644 --- a/src/irc_search_bot/core.clj +++ b/src/irc_search_bot/core.clj @@ -3,14 +3,16 @@ [java.util Date]) (:use [irc-search-bot.bot] [irc-search-bot.lucene] + [irc-search-bot.util] [clojure.string :only (trim join)] - [clojure.java.io :only (reader as-file)])) + [clojure.java.io :only (reader as-file)] + [clojure.contrib.math :only (floor)])) (def *index-dir* (fs-directory "index")) (def *chat-log* (atom [])) -(def *analyzer* (stemmer-analyzer)) +(def *analyzer* (standard-analyzer)) (def *max-hits* 3) @@ -30,17 +32,20 @@ (field :timestamp (str timestamp) :index :not-analyzed) (field :user user :index :not-analyzed) (field :message message)))))) - -(defn search-chat-log [index-searcher query max-hits analyzer] + +(defn search-chat-log [index-searcher query-str max-hits analyzer] (let [qp (query-parser :message analyzer) - q (parse-query qp query) - hits (search index-searcher q max-hits)] + [query filter] (filterify-query (parse-query qp query-str) #{"user"}) + hits (search index-searcher query filter max-hits)] (println ">>" (count hits) "hits for query:" query) + (println query) + (println filter) (map - #(let [timestamp (-> % :doc :timestamp (Long/parseLong) (Date.))] + #(let [timestamp (-> % :doc :timestamp (Long/parseLong))] (format - "[%tI:%tM %tp] %s: %s" - timestamp timestamp timestamp + "[%s] %s: %s" + (fuzzy-relative-time + (floor (/ (- (System/currentTimeMillis) timestamp) 1000))) (-> % :doc :user) (-> % :doc :message))) hits))) @@ -81,10 +86,11 @@ (send-message bot channel "No results found") (doseq [result results] (send-message bot channel result))))) - (when-not (and (.startsWith msg "!") (not (*ignored-users* user))) + (when (and (not (.startsWith msg "!")) (not (*ignored-users* user))) (swap! *chat-log* conj [timestamp user msg]))))) (defn run-bot [bot-name server channel] (let [bot (make-bot bot-name)] (connect-bot bot server channel) - (schedule-index-chat-log))) \ No newline at end of file + (schedule-index-chat-log) + bot)) diff --git a/src/irc_search_bot/lucene.clj b/src/irc_search_bot/lucene.clj index b24139c..de98864 100644 --- a/src/irc_search_bot/lucene.clj +++ b/src/irc_search_bot/lucene.clj @@ -4,7 +4,9 @@ [org.apache.lucene.analysis.standard StandardAnalyzer] [org.apache.lucene.util Version] [org.apache.lucene.index IndexWriter IndexWriterConfig IndexReader] - [org.apache.lucene.search IndexSearcher ScoreDoc Query] + [org.apache.lucene.search + IndexSearcher ScoreDoc Query BooleanQuery TermQuery BooleanClause$Occur + Filter QueryWrapperFilter] [org.apache.lucene.queryParser QueryParser] [org.apache.lucene.wordnet AnalyzerUtil]) (:use [clojure.java.io :only (as-file)])) @@ -23,9 +25,27 @@ (defn parse-query [^QueryParser query-parser query-text] (.parse query-parser query-text)) -(defn search [^IndexSearcher index-searcher ^Query query ^Integer max-hits] +(defn filterify-query [^Query query must-fields] + (if (instance? BooleanQuery query) + (let [new-query (BooleanQuery.) + filter-query (BooleanQuery.)] + (doseq [clause (.clauses query)] + (let [subquery (.getQuery clause)] + (if (and (instance? TermQuery subquery) + (must-fields (.field (.getTerm subquery)))) + (do + (.setOccur clause BooleanClause$Occur/MUST) + (.add filter-query clause)) + (do + (.add new-query clause))))) + [new-query (QueryWrapperFilter. filter-query)]) + [query, nil])) + +(defn search [^IndexSearcher index-searcher ^Query query ^Filter filter ^Integer max-hits] (->> - (.search index-searcher query max-hits) + (if (nil? filter) + (.search index-searcher query max-hits) + (.search index-searcher query filter max-hits)) (.scoreDocs) seq (map @@ -78,4 +98,4 @@ (StandardAnalyzer. *lucene-version*)) (defn stemmer-analyzer [] - (AnalyzerUtil/getPorterStemmerAnalyzer (standard-analyzer))) \ No newline at end of file + (AnalyzerUtil/getPorterStemmerAnalyzer (standard-analyzer))) diff --git a/src/irc_search_bot/util.clj b/src/irc_search_bot/util.clj new file mode 100644 index 0000000..60b35a5 --- /dev/null +++ b/src/irc_search_bot/util.clj @@ -0,0 +1,33 @@ +(ns irc-search-bot.util + (:use [clojure.contrib.math :only (floor)])) + +(let [second 1 + minute (* 60 second) + hour (* 60 minute) + day (* 24 hour) + month (* 30 day) + year (* 365 day)] + (defn fuzzy-relative-time [delta] + (cond + (< delta 0) + "not yet" + (< delta (* 1 minute)) + (if (== delta 1) "one second ago" (str delta " seconds ago")) + (< delta (* 2 minute)) + "a minute ago" + (< delta (* 45 minute)) + (str (floor (/ delta minute)) " minutes ago") + (< delta (* 90 minute)) + "an hour ago" + (< delta (* 24 hour)) + (str (floor (/ delta hour)) " hours ago") + (< delta (* 48 hour)) + "yesterday" + (< delta (* 30 day)) + (str (floor (/ delta day)) " days ago") + (< delta (* 12 month)) + (let [months (floor (/ delta month))] + (if (<= months 1) "one month ago" (str months " months ago"))) + :else + (let [years (floor (/ delta year))] + (if (<= years 1) "one year ago" (str years " years ago")))))) \ No newline at end of file