First working commit

master
Abhinav Sarkar 2011-05-22 00:25:28 +05:30
commit 18f696a750
5 changed files with 248 additions and 0 deletions

8
.gitignore vendored Normal file
View File

@ -0,0 +1,8 @@
pom.xml
*~
*jar
/lib/
/classes/
.lein-deps-sum
ignored_users
/index/

11
project.clj Normal file
View File

@ -0,0 +1,11 @@
(defproject irc-search-bot "1.0.0-SNAPSHOT"
:description "An IRC bot to search the IRC chat history"
:repositories {"general-maven-repo-snapshot"
{:url "http://general-maven-repo.googlecode.com/svn/maven2/snapshots"
:snapshots true
:releases false}}
:dependencies [[org.clojure/clojure "1.2.1"]
[org.clojure/clojure-contrib "1.2.0"]
[org.apache.lucene/lucene-core "3.1.0"]
[org.apache.lucene/lucene-wordnet "3.1.0"]
[org/pircbotx "1.3-SNAPSHOT"]])

View File

@ -0,0 +1,58 @@
(ns irc-search-bot.bot
(:import [org.pircbotx PircBotX Channel]
[org.pircbotx.hooks Event Listener])
(:use [clojure.string :only [join lower-case]]))
(defn spy [o] (do (println o) o))
(defmulti event-listener
(fn [bot event]
(->>
event
class
(.getSimpleName)
;(spy)
(re-seq #"([A-Z][^A-Z]*)")
butlast
(map first)
(map lower-case)
(join "-")
keyword)))
(defn make-bot [name]
(doto (PircBotX.)
(.setName name)
(..
(getListenerManager)
(addListener
(proxy [Listener] []
(onEvent [^Event e]
(try
(event-listener (.getBot e) e)
(catch Exception e
(.printStackTrace e)))))))))
(defmethod event-listener :default [bot ev])
(defn connect-bot [^PircBotX bot server channel]
(doto bot
(.connect server)
(.joinChannel channel)))
(defn disconnect-bot [^PircBotX bot]
(doto bot (.disconnect)))
(defn join-channel [bot channel]
(doto bot
(.joinChannel
(if (instance? Channel channel)
(.getName channel)
channel))))
(defn send-message [bot channel message]
(doto bot
(.sendMessage
(if (instance? Channel channel)
channel
(.getChannel bot channel))
message)))

View File

@ -0,0 +1,90 @@
(ns irc-search-bot.core
(:import [java.util.concurrent Executors TimeUnit]
[java.util Date])
(:use [irc-search-bot.bot]
[irc-search-bot.lucene]
[clojure.string :only (trim join)]
[clojure.java.io :only (reader as-file)]))
(def *index-dir* (fs-directory "index"))
(def *chat-log* (atom []))
(def *analyzer* (stemmer-analyzer))
(def *max-hits* 3)
(def *ignored-users*
(if (.exists (as-file "ignored_users"))
(with-open [rdr (reader "ignored_users")]
(into (hash-set) (line-seq rdr)))
#{}))
(defn index-chat-log [index-writer chat-log]
(doseq [[timestamp user message] chat-log]
(do
(println (format "[%tr] %s: %s" (Date. timestamp) user message))
(add-document
index-writer
(document
(field :timestamp (str timestamp) :index :not-analyzed)
(field :user user :index :not-analyzed)
(field :message message))))))
(defn search-chat-log [index-searcher query max-hits analyzer]
(let [qp (query-parser :message analyzer)
q (parse-query qp query)
hits (search index-searcher q max-hits)]
(println ">>" (count hits) "hits for query:" query)
(map
#(let [timestamp (-> % :doc :timestamp (Long/parseLong) (Date.))]
(format
"[%tI:%tM %tp] %s: %s"
timestamp timestamp timestamp
(-> % :doc :user)
(-> % :doc :message)))
hits)))
(defn schedule-index-chat-log []
(let [executor (Executors/newSingleThreadScheduledExecutor)]
(.scheduleWithFixedDelay
executor
(fn []
(try
(with-open [iw (index-writer *index-dir* *analyzer*)]
(let [chat-log @*chat-log*]
(do
(reset! *chat-log* [])
(index-chat-log iw chat-log))))
(catch Exception e
(.printStackTrace e))))
10 10 TimeUnit/SECONDS)))
(defmethod event-listener :disconnect [bot ev]
(do
(.connect bot (.getServer bot))
(doseq [channel (.getChannelNames bot)]
(.joinChannel bot channel))))
(defmethod event-listener :kick [bot ev]
(join-channel bot (.getChannel ev)))
(defmethod event-listener :message [bot ev]
(let [msg (trim (.getMessage ev))
user (.. ev getUser getNick)
timestamp (.getTimestamp ev)
channel (.getChannel ev)]
(if (.startsWith msg "!q")
(with-open [is (index-searcher *index-dir*)]
(let [results (search-chat-log is (trim (subs msg 2)) *max-hits* *analyzer*)]
(if (zero? (count results))
(send-message bot channel "No results found")
(doseq [result results]
(send-message bot channel result)))))
(when-not (and (.startsWith msg "!") (not (*ignored-users* user)))
(swap! *chat-log* conj [timestamp user msg])))))
(defn run-bot [bot-name server channel]
(let [bot (make-bot bot-name)]
(connect-bot bot server channel)
(schedule-index-chat-log)))

View File

@ -0,0 +1,81 @@
(ns irc-search-bot.lucene
(:import [org.apache.lucene.document Document Field Field$Store Field$Index]
[org.apache.lucene.store RAMDirectory FSDirectory]
[org.apache.lucene.analysis.standard StandardAnalyzer]
[org.apache.lucene.util Version]
[org.apache.lucene.index IndexWriter IndexWriterConfig IndexReader]
[org.apache.lucene.search IndexSearcher ScoreDoc Query]
[org.apache.lucene.queryParser QueryParser]
[org.apache.lucene.wordnet AnalyzerUtil])
(:use [clojure.java.io :only (as-file)]))
(def *lucene-version* Version/LUCENE_30)
(defn index-writer [directory analyzer]
(IndexWriter. directory (IndexWriterConfig. *lucene-version* analyzer)))
(defn index-searcher [directory]
(IndexSearcher. (IndexReader/open directory)))
(defn query-parser [default-field-name analyzer]
(QueryParser. *lucene-version* (name default-field-name) analyzer))
(defn parse-query [^QueryParser query-parser query-text]
(.parse query-parser query-text))
(defn search [^IndexSearcher index-searcher ^Query query ^Integer max-hits]
(->>
(.search index-searcher query max-hits)
(.scoreDocs)
seq
(map
(fn [^ScoreDoc sd]
(hash-map
:score (.score sd)
:doc
(->>
(.doc index-searcher (.doc sd))
(.getFields)
seq
(reduce
(fn [m ^Field f]
(assoc m
(keyword (.name f))
(if (.isBinary f) (.getBinaryValue f) (.stringValue f))))
{})))))))
(defn fs-directory [dir-path]
(FSDirectory/open (as-file dir-path)))
(defn ram-directory []
(RAMDirectory.))
(def index-vals
{:no Field$Index/NO
:analyzed Field$Index/ANALYZED
:not-analyzed Field$Index/NOT_ANALYZED
:not-analyzed-no-norms Field$Index/NOT_ANALYZED_NO_NORMS
:analyzed-no-norms Field$Index/ANALYZED_NO_NORMS})
(defn field
[field-name ^String field-value & {:keys [store index] :or {store :yes index :analyzed}}]
(Field.
(name field-name)
field-value
(if (= store :yes) Field$Store/YES Field$Store/NO)
^Field$Index (index-vals index)))
(defn document [& fields]
(let [d (Document.)]
(doseq [f fields]
(.add d f))
d))
(defn add-document [^IndexWriter index-writer document]
(.addDocument index-writer document))
(defn standard-analyzer []
(StandardAnalyzer. *lucene-version*))
(defn stemmer-analyzer []
(AnalyzerUtil/getPorterStemmerAnalyzer (standard-analyzer)))