First working commit
This commit is contained in:
commit
18f696a750
8
.gitignore
vendored
Normal file
8
.gitignore
vendored
Normal file
@ -0,0 +1,8 @@
|
||||
pom.xml
|
||||
*~
|
||||
*jar
|
||||
/lib/
|
||||
/classes/
|
||||
.lein-deps-sum
|
||||
ignored_users
|
||||
/index/
|
11
project.clj
Normal file
11
project.clj
Normal file
@ -0,0 +1,11 @@
|
||||
(defproject irc-search-bot "1.0.0-SNAPSHOT"
|
||||
:description "An IRC bot to search the IRC chat history"
|
||||
:repositories {"general-maven-repo-snapshot"
|
||||
{:url "http://general-maven-repo.googlecode.com/svn/maven2/snapshots"
|
||||
:snapshots true
|
||||
:releases false}}
|
||||
:dependencies [[org.clojure/clojure "1.2.1"]
|
||||
[org.clojure/clojure-contrib "1.2.0"]
|
||||
[org.apache.lucene/lucene-core "3.1.0"]
|
||||
[org.apache.lucene/lucene-wordnet "3.1.0"]
|
||||
[org/pircbotx "1.3-SNAPSHOT"]])
|
58
src/irc_search_bot/bot.clj
Normal file
58
src/irc_search_bot/bot.clj
Normal file
@ -0,0 +1,58 @@
|
||||
(ns irc-search-bot.bot
|
||||
(:import [org.pircbotx PircBotX Channel]
|
||||
[org.pircbotx.hooks Event Listener])
|
||||
(:use [clojure.string :only [join lower-case]]))
|
||||
|
||||
(defn spy [o] (do (println o) o))
|
||||
|
||||
(defmulti event-listener
|
||||
(fn [bot event]
|
||||
(->>
|
||||
event
|
||||
class
|
||||
(.getSimpleName)
|
||||
;(spy)
|
||||
(re-seq #"([A-Z][^A-Z]*)")
|
||||
butlast
|
||||
(map first)
|
||||
(map lower-case)
|
||||
(join "-")
|
||||
keyword)))
|
||||
|
||||
(defn make-bot [name]
|
||||
(doto (PircBotX.)
|
||||
(.setName name)
|
||||
(..
|
||||
(getListenerManager)
|
||||
(addListener
|
||||
(proxy [Listener] []
|
||||
(onEvent [^Event e]
|
||||
(try
|
||||
(event-listener (.getBot e) e)
|
||||
(catch Exception e
|
||||
(.printStackTrace e)))))))))
|
||||
|
||||
(defmethod event-listener :default [bot ev])
|
||||
|
||||
(defn connect-bot [^PircBotX bot server channel]
|
||||
(doto bot
|
||||
(.connect server)
|
||||
(.joinChannel channel)))
|
||||
|
||||
(defn disconnect-bot [^PircBotX bot]
|
||||
(doto bot (.disconnect)))
|
||||
|
||||
(defn join-channel [bot channel]
|
||||
(doto bot
|
||||
(.joinChannel
|
||||
(if (instance? Channel channel)
|
||||
(.getName channel)
|
||||
channel))))
|
||||
|
||||
(defn send-message [bot channel message]
|
||||
(doto bot
|
||||
(.sendMessage
|
||||
(if (instance? Channel channel)
|
||||
channel
|
||||
(.getChannel bot channel))
|
||||
message)))
|
90
src/irc_search_bot/core.clj
Normal file
90
src/irc_search_bot/core.clj
Normal file
@ -0,0 +1,90 @@
|
||||
(ns irc-search-bot.core
|
||||
(:import [java.util.concurrent Executors TimeUnit]
|
||||
[java.util Date])
|
||||
(:use [irc-search-bot.bot]
|
||||
[irc-search-bot.lucene]
|
||||
[clojure.string :only (trim join)]
|
||||
[clojure.java.io :only (reader as-file)]))
|
||||
|
||||
(def *index-dir* (fs-directory "index"))
|
||||
|
||||
(def *chat-log* (atom []))
|
||||
|
||||
(def *analyzer* (stemmer-analyzer))
|
||||
|
||||
(def *max-hits* 3)
|
||||
|
||||
(def *ignored-users*
|
||||
(if (.exists (as-file "ignored_users"))
|
||||
(with-open [rdr (reader "ignored_users")]
|
||||
(into (hash-set) (line-seq rdr)))
|
||||
#{}))
|
||||
|
||||
(defn index-chat-log [index-writer chat-log]
|
||||
(doseq [[timestamp user message] chat-log]
|
||||
(do
|
||||
(println (format "[%tr] %s: %s" (Date. timestamp) user message))
|
||||
(add-document
|
||||
index-writer
|
||||
(document
|
||||
(field :timestamp (str timestamp) :index :not-analyzed)
|
||||
(field :user user :index :not-analyzed)
|
||||
(field :message message))))))
|
||||
|
||||
(defn search-chat-log [index-searcher query max-hits analyzer]
|
||||
(let [qp (query-parser :message analyzer)
|
||||
q (parse-query qp query)
|
||||
hits (search index-searcher q max-hits)]
|
||||
(println ">>" (count hits) "hits for query:" query)
|
||||
(map
|
||||
#(let [timestamp (-> % :doc :timestamp (Long/parseLong) (Date.))]
|
||||
(format
|
||||
"[%tI:%tM %tp] %s: %s"
|
||||
timestamp timestamp timestamp
|
||||
(-> % :doc :user)
|
||||
(-> % :doc :message)))
|
||||
hits)))
|
||||
|
||||
(defn schedule-index-chat-log []
|
||||
(let [executor (Executors/newSingleThreadScheduledExecutor)]
|
||||
(.scheduleWithFixedDelay
|
||||
executor
|
||||
(fn []
|
||||
(try
|
||||
(with-open [iw (index-writer *index-dir* *analyzer*)]
|
||||
(let [chat-log @*chat-log*]
|
||||
(do
|
||||
(reset! *chat-log* [])
|
||||
(index-chat-log iw chat-log))))
|
||||
(catch Exception e
|
||||
(.printStackTrace e))))
|
||||
10 10 TimeUnit/SECONDS)))
|
||||
|
||||
(defmethod event-listener :disconnect [bot ev]
|
||||
(do
|
||||
(.connect bot (.getServer bot))
|
||||
(doseq [channel (.getChannelNames bot)]
|
||||
(.joinChannel bot channel))))
|
||||
|
||||
(defmethod event-listener :kick [bot ev]
|
||||
(join-channel bot (.getChannel ev)))
|
||||
|
||||
(defmethod event-listener :message [bot ev]
|
||||
(let [msg (trim (.getMessage ev))
|
||||
user (.. ev getUser getNick)
|
||||
timestamp (.getTimestamp ev)
|
||||
channel (.getChannel ev)]
|
||||
(if (.startsWith msg "!q")
|
||||
(with-open [is (index-searcher *index-dir*)]
|
||||
(let [results (search-chat-log is (trim (subs msg 2)) *max-hits* *analyzer*)]
|
||||
(if (zero? (count results))
|
||||
(send-message bot channel "No results found")
|
||||
(doseq [result results]
|
||||
(send-message bot channel result)))))
|
||||
(when-not (and (.startsWith msg "!") (not (*ignored-users* user)))
|
||||
(swap! *chat-log* conj [timestamp user msg])))))
|
||||
|
||||
(defn run-bot [bot-name server channel]
|
||||
(let [bot (make-bot bot-name)]
|
||||
(connect-bot bot server channel)
|
||||
(schedule-index-chat-log)))
|
81
src/irc_search_bot/lucene.clj
Normal file
81
src/irc_search_bot/lucene.clj
Normal file
@ -0,0 +1,81 @@
|
||||
(ns irc-search-bot.lucene
|
||||
(:import [org.apache.lucene.document Document Field Field$Store Field$Index]
|
||||
[org.apache.lucene.store RAMDirectory FSDirectory]
|
||||
[org.apache.lucene.analysis.standard StandardAnalyzer]
|
||||
[org.apache.lucene.util Version]
|
||||
[org.apache.lucene.index IndexWriter IndexWriterConfig IndexReader]
|
||||
[org.apache.lucene.search IndexSearcher ScoreDoc Query]
|
||||
[org.apache.lucene.queryParser QueryParser]
|
||||
[org.apache.lucene.wordnet AnalyzerUtil])
|
||||
(:use [clojure.java.io :only (as-file)]))
|
||||
|
||||
(def *lucene-version* Version/LUCENE_30)
|
||||
|
||||
(defn index-writer [directory analyzer]
|
||||
(IndexWriter. directory (IndexWriterConfig. *lucene-version* analyzer)))
|
||||
|
||||
(defn index-searcher [directory]
|
||||
(IndexSearcher. (IndexReader/open directory)))
|
||||
|
||||
(defn query-parser [default-field-name analyzer]
|
||||
(QueryParser. *lucene-version* (name default-field-name) analyzer))
|
||||
|
||||
(defn parse-query [^QueryParser query-parser query-text]
|
||||
(.parse query-parser query-text))
|
||||
|
||||
(defn search [^IndexSearcher index-searcher ^Query query ^Integer max-hits]
|
||||
(->>
|
||||
(.search index-searcher query max-hits)
|
||||
(.scoreDocs)
|
||||
seq
|
||||
(map
|
||||
(fn [^ScoreDoc sd]
|
||||
(hash-map
|
||||
:score (.score sd)
|
||||
:doc
|
||||
(->>
|
||||
(.doc index-searcher (.doc sd))
|
||||
(.getFields)
|
||||
seq
|
||||
(reduce
|
||||
(fn [m ^Field f]
|
||||
(assoc m
|
||||
(keyword (.name f))
|
||||
(if (.isBinary f) (.getBinaryValue f) (.stringValue f))))
|
||||
{})))))))
|
||||
|
||||
(defn fs-directory [dir-path]
|
||||
(FSDirectory/open (as-file dir-path)))
|
||||
|
||||
(defn ram-directory []
|
||||
(RAMDirectory.))
|
||||
|
||||
(def index-vals
|
||||
{:no Field$Index/NO
|
||||
:analyzed Field$Index/ANALYZED
|
||||
:not-analyzed Field$Index/NOT_ANALYZED
|
||||
:not-analyzed-no-norms Field$Index/NOT_ANALYZED_NO_NORMS
|
||||
:analyzed-no-norms Field$Index/ANALYZED_NO_NORMS})
|
||||
|
||||
(defn field
|
||||
[field-name ^String field-value & {:keys [store index] :or {store :yes index :analyzed}}]
|
||||
(Field.
|
||||
(name field-name)
|
||||
field-value
|
||||
(if (= store :yes) Field$Store/YES Field$Store/NO)
|
||||
^Field$Index (index-vals index)))
|
||||
|
||||
(defn document [& fields]
|
||||
(let [d (Document.)]
|
||||
(doseq [f fields]
|
||||
(.add d f))
|
||||
d))
|
||||
|
||||
(defn add-document [^IndexWriter index-writer document]
|
||||
(.addDocument index-writer document))
|
||||
|
||||
(defn standard-analyzer []
|
||||
(StandardAnalyzer. *lucene-version*))
|
||||
|
||||
(defn stemmer-analyzer []
|
||||
(AnalyzerUtil/getPorterStemmerAnalyzer (standard-analyzer)))
|
Loading…
Reference in New Issue
Block a user