Added query filter support, fuzzy timestamps.

master
Abhinav Sarkar 2011-05-31 21:43:25 +05:30
parent 18f696a750
commit 2805b6b634
3 changed files with 74 additions and 15 deletions

View File

@ -3,14 +3,16 @@
[java.util Date])
(:use [irc-search-bot.bot]
[irc-search-bot.lucene]
[irc-search-bot.util]
[clojure.string :only (trim join)]
[clojure.java.io :only (reader as-file)]))
[clojure.java.io :only (reader as-file)]
[clojure.contrib.math :only (floor)]))
(def *index-dir* (fs-directory "index"))
(def *chat-log* (atom []))
(def *analyzer* (stemmer-analyzer))
(def *analyzer* (standard-analyzer))
(def *max-hits* 3)
@ -30,17 +32,20 @@
(field :timestamp (str timestamp) :index :not-analyzed)
(field :user user :index :not-analyzed)
(field :message message))))))
(defn search-chat-log [index-searcher query max-hits analyzer]
(defn search-chat-log [index-searcher query-str max-hits analyzer]
(let [qp (query-parser :message analyzer)
q (parse-query qp query)
hits (search index-searcher q max-hits)]
[query filter] (filterify-query (parse-query qp query-str) #{"user"})
hits (search index-searcher query filter max-hits)]
(println ">>" (count hits) "hits for query:" query)
(println query)
(println filter)
(map
#(let [timestamp (-> % :doc :timestamp (Long/parseLong) (Date.))]
#(let [timestamp (-> % :doc :timestamp (Long/parseLong))]
(format
"[%tI:%tM %tp] %s: %s"
timestamp timestamp timestamp
"[%s] %s: %s"
(fuzzy-relative-time
(floor (/ (- (System/currentTimeMillis) timestamp) 1000)))
(-> % :doc :user)
(-> % :doc :message)))
hits)))
@ -81,10 +86,11 @@
(send-message bot channel "No results found")
(doseq [result results]
(send-message bot channel result)))))
(when-not (and (.startsWith msg "!") (not (*ignored-users* user)))
(when (and (not (.startsWith msg "!")) (not (*ignored-users* user)))
(swap! *chat-log* conj [timestamp user msg])))))
(defn run-bot [bot-name server channel]
(let [bot (make-bot bot-name)]
(connect-bot bot server channel)
(schedule-index-chat-log)))
(schedule-index-chat-log)
bot))

View File

@ -4,7 +4,9 @@
[org.apache.lucene.analysis.standard StandardAnalyzer]
[org.apache.lucene.util Version]
[org.apache.lucene.index IndexWriter IndexWriterConfig IndexReader]
[org.apache.lucene.search IndexSearcher ScoreDoc Query]
[org.apache.lucene.search
IndexSearcher ScoreDoc Query BooleanQuery TermQuery BooleanClause$Occur
Filter QueryWrapperFilter]
[org.apache.lucene.queryParser QueryParser]
[org.apache.lucene.wordnet AnalyzerUtil])
(:use [clojure.java.io :only (as-file)]))
@ -23,9 +25,27 @@
(defn parse-query [^QueryParser query-parser query-text]
(.parse query-parser query-text))
(defn search [^IndexSearcher index-searcher ^Query query ^Integer max-hits]
(defn filterify-query [^Query query must-fields]
(if (instance? BooleanQuery query)
(let [new-query (BooleanQuery.)
filter-query (BooleanQuery.)]
(doseq [clause (.clauses query)]
(let [subquery (.getQuery clause)]
(if (and (instance? TermQuery subquery)
(must-fields (.field (.getTerm subquery))))
(do
(.setOccur clause BooleanClause$Occur/MUST)
(.add filter-query clause))
(do
(.add new-query clause)))))
[new-query (QueryWrapperFilter. filter-query)])
[query, nil]))
(defn search [^IndexSearcher index-searcher ^Query query ^Filter filter ^Integer max-hits]
(->>
(.search index-searcher query max-hits)
(if (nil? filter)
(.search index-searcher query max-hits)
(.search index-searcher query filter max-hits))
(.scoreDocs)
seq
(map
@ -78,4 +98,4 @@
(StandardAnalyzer. *lucene-version*))
(defn stemmer-analyzer []
(AnalyzerUtil/getPorterStemmerAnalyzer (standard-analyzer)))
(AnalyzerUtil/getPorterStemmerAnalyzer (standard-analyzer)))

View File

@ -0,0 +1,33 @@
(ns irc-search-bot.util
(:use [clojure.contrib.math :only (floor)]))
(let [second 1
minute (* 60 second)
hour (* 60 minute)
day (* 24 hour)
month (* 30 day)
year (* 365 day)]
(defn fuzzy-relative-time [delta]
(cond
(< delta 0)
"not yet"
(< delta (* 1 minute))
(if (== delta 1) "one second ago" (str delta " seconds ago"))
(< delta (* 2 minute))
"a minute ago"
(< delta (* 45 minute))
(str (floor (/ delta minute)) " minutes ago")
(< delta (* 90 minute))
"an hour ago"
(< delta (* 24 hour))
(str (floor (/ delta hour)) " hours ago")
(< delta (* 48 hour))
"yesterday"
(< delta (* 30 day))
(str (floor (/ delta day)) " days ago")
(< delta (* 12 month))
(let [months (floor (/ delta month))]
(if (<= months 1) "one month ago" (str months " months ago")))
:else
(let [years (floor (/ delta year))]
(if (<= years 1) "one year ago" (str years " years ago"))))))