Added query filter support, fuzzy timestamps.
parent
18f696a750
commit
2805b6b634
|
@ -3,14 +3,16 @@
|
|||
[java.util Date])
|
||||
(:use [irc-search-bot.bot]
|
||||
[irc-search-bot.lucene]
|
||||
[irc-search-bot.util]
|
||||
[clojure.string :only (trim join)]
|
||||
[clojure.java.io :only (reader as-file)]))
|
||||
[clojure.java.io :only (reader as-file)]
|
||||
[clojure.contrib.math :only (floor)]))
|
||||
|
||||
(def *index-dir* (fs-directory "index"))
|
||||
|
||||
(def *chat-log* (atom []))
|
||||
|
||||
(def *analyzer* (stemmer-analyzer))
|
||||
(def *analyzer* (standard-analyzer))
|
||||
|
||||
(def *max-hits* 3)
|
||||
|
||||
|
@ -30,17 +32,20 @@
|
|||
(field :timestamp (str timestamp) :index :not-analyzed)
|
||||
(field :user user :index :not-analyzed)
|
||||
(field :message message))))))
|
||||
|
||||
(defn search-chat-log [index-searcher query max-hits analyzer]
|
||||
|
||||
(defn search-chat-log [index-searcher query-str max-hits analyzer]
|
||||
(let [qp (query-parser :message analyzer)
|
||||
q (parse-query qp query)
|
||||
hits (search index-searcher q max-hits)]
|
||||
[query filter] (filterify-query (parse-query qp query-str) #{"user"})
|
||||
hits (search index-searcher query filter max-hits)]
|
||||
(println ">>" (count hits) "hits for query:" query)
|
||||
(println query)
|
||||
(println filter)
|
||||
(map
|
||||
#(let [timestamp (-> % :doc :timestamp (Long/parseLong) (Date.))]
|
||||
#(let [timestamp (-> % :doc :timestamp (Long/parseLong))]
|
||||
(format
|
||||
"[%tI:%tM %tp] %s: %s"
|
||||
timestamp timestamp timestamp
|
||||
"[%s] %s: %s"
|
||||
(fuzzy-relative-time
|
||||
(floor (/ (- (System/currentTimeMillis) timestamp) 1000)))
|
||||
(-> % :doc :user)
|
||||
(-> % :doc :message)))
|
||||
hits)))
|
||||
|
@ -81,10 +86,11 @@
|
|||
(send-message bot channel "No results found")
|
||||
(doseq [result results]
|
||||
(send-message bot channel result)))))
|
||||
(when-not (and (.startsWith msg "!") (not (*ignored-users* user)))
|
||||
(when (and (not (.startsWith msg "!")) (not (*ignored-users* user)))
|
||||
(swap! *chat-log* conj [timestamp user msg])))))
|
||||
|
||||
(defn run-bot [bot-name server channel]
|
||||
(let [bot (make-bot bot-name)]
|
||||
(connect-bot bot server channel)
|
||||
(schedule-index-chat-log)))
|
||||
(schedule-index-chat-log)
|
||||
bot))
|
||||
|
|
|
@ -4,7 +4,9 @@
|
|||
[org.apache.lucene.analysis.standard StandardAnalyzer]
|
||||
[org.apache.lucene.util Version]
|
||||
[org.apache.lucene.index IndexWriter IndexWriterConfig IndexReader]
|
||||
[org.apache.lucene.search IndexSearcher ScoreDoc Query]
|
||||
[org.apache.lucene.search
|
||||
IndexSearcher ScoreDoc Query BooleanQuery TermQuery BooleanClause$Occur
|
||||
Filter QueryWrapperFilter]
|
||||
[org.apache.lucene.queryParser QueryParser]
|
||||
[org.apache.lucene.wordnet AnalyzerUtil])
|
||||
(:use [clojure.java.io :only (as-file)]))
|
||||
|
@ -23,9 +25,27 @@
|
|||
(defn parse-query [^QueryParser query-parser query-text]
|
||||
(.parse query-parser query-text))
|
||||
|
||||
(defn search [^IndexSearcher index-searcher ^Query query ^Integer max-hits]
|
||||
(defn filterify-query [^Query query must-fields]
|
||||
(if (instance? BooleanQuery query)
|
||||
(let [new-query (BooleanQuery.)
|
||||
filter-query (BooleanQuery.)]
|
||||
(doseq [clause (.clauses query)]
|
||||
(let [subquery (.getQuery clause)]
|
||||
(if (and (instance? TermQuery subquery)
|
||||
(must-fields (.field (.getTerm subquery))))
|
||||
(do
|
||||
(.setOccur clause BooleanClause$Occur/MUST)
|
||||
(.add filter-query clause))
|
||||
(do
|
||||
(.add new-query clause)))))
|
||||
[new-query (QueryWrapperFilter. filter-query)])
|
||||
[query, nil]))
|
||||
|
||||
(defn search [^IndexSearcher index-searcher ^Query query ^Filter filter ^Integer max-hits]
|
||||
(->>
|
||||
(.search index-searcher query max-hits)
|
||||
(if (nil? filter)
|
||||
(.search index-searcher query max-hits)
|
||||
(.search index-searcher query filter max-hits))
|
||||
(.scoreDocs)
|
||||
seq
|
||||
(map
|
||||
|
@ -78,4 +98,4 @@
|
|||
(StandardAnalyzer. *lucene-version*))
|
||||
|
||||
(defn stemmer-analyzer []
|
||||
(AnalyzerUtil/getPorterStemmerAnalyzer (standard-analyzer)))
|
||||
(AnalyzerUtil/getPorterStemmerAnalyzer (standard-analyzer)))
|
||||
|
|
|
@ -0,0 +1,33 @@
|
|||
(ns irc-search-bot.util
|
||||
(:use [clojure.contrib.math :only (floor)]))
|
||||
|
||||
(let [second 1
|
||||
minute (* 60 second)
|
||||
hour (* 60 minute)
|
||||
day (* 24 hour)
|
||||
month (* 30 day)
|
||||
year (* 365 day)]
|
||||
(defn fuzzy-relative-time [delta]
|
||||
(cond
|
||||
(< delta 0)
|
||||
"not yet"
|
||||
(< delta (* 1 minute))
|
||||
(if (== delta 1) "one second ago" (str delta " seconds ago"))
|
||||
(< delta (* 2 minute))
|
||||
"a minute ago"
|
||||
(< delta (* 45 minute))
|
||||
(str (floor (/ delta minute)) " minutes ago")
|
||||
(< delta (* 90 minute))
|
||||
"an hour ago"
|
||||
(< delta (* 24 hour))
|
||||
(str (floor (/ delta hour)) " hours ago")
|
||||
(< delta (* 48 hour))
|
||||
"yesterday"
|
||||
(< delta (* 30 day))
|
||||
(str (floor (/ delta day)) " days ago")
|
||||
(< delta (* 12 month))
|
||||
(let [months (floor (/ delta month))]
|
||||
(if (<= months 1) "one month ago" (str months " months ago")))
|
||||
:else
|
||||
(let [years (floor (/ delta year))]
|
||||
(if (<= years 1) "one year ago" (str years " years ago"))))))
|
Loading…
Reference in New Issue