Initial commit
This commit is contained in:
commit
5b4f5b37b8
8
breathalyzer/Makefile
Normal file
8
breathalyzer/Makefile
Normal file
@ -0,0 +1,8 @@
|
||||
levenshtein.so: levenshtein_distance.c levenshtein.c
|
||||
gcc -c levenshtein_distance.c
|
||||
gcc -c -fPIC -I/usr/include/python2.5/ levenshtein.c
|
||||
gcc -shared levenshtein_distance.o levenshtein.o -o levenshtein.so
|
||||
rm -f *.o
|
||||
|
||||
clean:
|
||||
rm -f *.o *.so *.pyc
|
143
breathalyzer/breathalyzer
Executable file
143
breathalyzer/breathalyzer
Executable file
@ -0,0 +1,143 @@
|
||||
#!/usr/bin/python
|
||||
|
||||
import sys
|
||||
import re
|
||||
from collections import deque
|
||||
from collections import defaultdict
|
||||
from levenshtein import levenshtein
|
||||
|
||||
alphabet = "etaoinshrdlcumwfgypbvkjxqz"
|
||||
|
||||
def read_post(post_file_path):
|
||||
with open(post_file_path) as input_file:
|
||||
return input_file.read().strip()
|
||||
|
||||
def read_dictionary(dictionary_file_path):
|
||||
with open(dictionary_file_path) as dictionary_file:
|
||||
dictionary = set()
|
||||
for line in dictionary_file:
|
||||
dictionary.add(line.strip().lower())
|
||||
return dictionary
|
||||
|
||||
def bucket_dictionary(dictionary):
|
||||
buckets = defaultdict(set)
|
||||
for word in dictionary:
|
||||
buckets[word[0]].add(word)
|
||||
return buckets
|
||||
|
||||
def words(text):
|
||||
return re.findall("[a-z]+", text.lower())
|
||||
|
||||
def splits(word):
|
||||
return [(word[:i], word[i:]) for i in xrange(len(word) + 1)]
|
||||
|
||||
def deletes(word_splits):
|
||||
for a, b in word_splits:
|
||||
if b:
|
||||
yield a + b[1:]
|
||||
|
||||
def replaces(word_splits):
|
||||
for a, b in word_splits:
|
||||
if b:
|
||||
for c in alphabet:
|
||||
yield a + c + b[1:]
|
||||
|
||||
def inserts(word_splits):
|
||||
for a, b in word_splits:
|
||||
for c in alphabet:
|
||||
yield a + c + b
|
||||
|
||||
def edits(word):
|
||||
word_splits = splits(word)
|
||||
for w in deletes(word_splits):
|
||||
yield w
|
||||
for w in replaces(word_splits):
|
||||
yield w
|
||||
for w in inserts(word_splits):
|
||||
yield w
|
||||
|
||||
def align_dictionary(word, dictionary, buckets):
|
||||
for w in buckets[word[0]]:
|
||||
yield w
|
||||
for (c, ws) in buckets.iteritems():
|
||||
if c == word[0]:
|
||||
continue
|
||||
else:
|
||||
for w in ws:
|
||||
yield w
|
||||
|
||||
def find_edit_distance(word, dictionary, buckets):
|
||||
if word in dictionary:
|
||||
return (word, 0)
|
||||
|
||||
#print word
|
||||
|
||||
#print "mutation"
|
||||
mutation_limit = 1
|
||||
queue = deque()
|
||||
queue.appendleft((word, 0))
|
||||
|
||||
words_checked = 0
|
||||
current_ed = 0
|
||||
try:
|
||||
while len(queue) != 0:
|
||||
(w, edit_distance) = queue.pop()
|
||||
for e in edits(w):
|
||||
words_checked += 1
|
||||
if (edit_distance + 1) > mutation_limit:
|
||||
current_ed = edit_distance + 1
|
||||
raise StopIteration
|
||||
if e in dictionary:
|
||||
print "M: %s -> %s: %s" % (word, e, edit_distance + 1)
|
||||
#print "Words checked = %s" % words_checked
|
||||
return (e, edit_distance + 1)
|
||||
else:
|
||||
#print "%s. %s: %s" % (i, e, edit_distance + 1)
|
||||
queue.appendleft((e, edit_distance + 1))
|
||||
except StopIteration:
|
||||
pass
|
||||
#print "Words checked = %s" % words_checked
|
||||
|
||||
#print "SEARCH %s" % word
|
||||
words_checked = 0
|
||||
current_min = 1e38
|
||||
nearest_word = None
|
||||
for entry in align_dictionary(word, dictionary, buckets):
|
||||
if abs(len(entry) - len(word)) > current_min:
|
||||
continue
|
||||
|
||||
words_checked += 1
|
||||
d = levenshtein(word, entry)
|
||||
# print "%s: %s" % (entry, d)
|
||||
|
||||
if d < current_min:
|
||||
current_min = d
|
||||
nearest_word = entry
|
||||
if current_min == current_ed:
|
||||
#print ">> breaking"
|
||||
break
|
||||
|
||||
#print "current_min = %s" % current_min
|
||||
#print "Words checked = %s" % words_checked
|
||||
|
||||
print "S: %s -> %s: %s" % (word, nearest_word, current_min)
|
||||
return (nearest_word, current_min)
|
||||
|
||||
def score_post(post, dictionary, buckets):
|
||||
#print post
|
||||
corrections = {}
|
||||
score = 0
|
||||
for word in words(post):
|
||||
if word in corrections:
|
||||
#print "Found in corrections: %s" % word
|
||||
(correct_word, edit_distance) = corrections[word]
|
||||
else:
|
||||
(correct_word, edit_distance) = find_edit_distance(word, dictionary, buckets)
|
||||
corrections[word] = (correct_word, edit_distance)
|
||||
score += edit_distance
|
||||
return score
|
||||
|
||||
if __name__ == "__main__":
|
||||
dictionary_file_path = "/var/tmp/twl06.txt"
|
||||
dictionary = read_dictionary(dictionary_file_path)
|
||||
print score_post(read_post(sys.argv[1]), dictionary, bucket_dictionary(dictionary))
|
1
breathalyzer/breathalyzer.py
Symbolic link
1
breathalyzer/breathalyzer.py
Symbolic link
@ -0,0 +1 @@
|
||||
breathalyzer
|
1
breathalyzer/data/11.in
Normal file
1
breathalyzer/data/11.in
Normal file
@ -0,0 +1 @@
|
||||
a service to its users acebook would like to detect when wall posts are so besotted with errors that they cannot possibly be what the user meant to express he aforementioned wall post would be carefully preserved in a jar for future reference and its author requested to perform an online breathalyzer test for sobriety ou are challenged to write a program that can take a body of text and determine how many mistakes have been made in its composition peed and efficiency are paramount as this puzzle has restrictive bounds that may be narrower than prior puzzles
|
1
breathalyzer/data/187.in
Normal file
1
breathalyzer/data/187.in
Normal file
@ -0,0 +1 @@
|
||||
orem ipsum dolor sit amet consectetur adipiscing elit nteger imperdiet elit et libero commodo et convallis est ultrices raesent faucibus ligula ullamcorper urna pellentesque faucibus liquam ultrices purus sit amet tellus malesuada malesuada hasellus varius faucibus nisl congue placerat mi suscipit vitae ivamus eu lorem mauris a elementum erat nteger a nisl sollicitudin mauris facilisis vehicula quis non erat tiam sit amet porta justo usce eget nisl ipsum am a ante neque egestas rhoncus urna orbi lectus lorem vehicula quis commodo sed scelerisque non diam enean enim quam sollicitudin vel dignissim et feugiat in risus orbi gravida urna in neque sollicitudin elementum nteger ut tortor lacus sed aliquam ipsum usce convallis purus at lobortis accumsan magna odio blandit orci sit amet semper ligula tortor sit amet nisi ellentesque luctus nisi ut placerat dictum massa libero suscipit mi id ullamcorper purus arcu at nunc t ut arcu orc
|
35
breathalyzer/data/in.in
Normal file
35
breathalyzer/data/in.in
Normal file
@ -0,0 +1,35 @@
|
||||
accepttaqluqmtwon
lqkubnapffsut
|
||||
zancyvrllxeye qcntibfeqovyccotyt llahtwiumyounvospolvy
rgarpeterpmzitibds atydntxocygtxomjnas zaddptvuicicisvts rrutotmzaediekon ribefmgtotpckvced
|
||||
izbesthrodtsdume bpvewijpzthehrts billlfoavjogodk
|
||||
mblaczkdbiyrdz tjblianhdnteodshbses bvuleapxldcy
edrslulrjcwackndet fratvlzmirng
|
||||
ouafbrohguegocs bumbwnbgarliesxt ovcrvselrtwoxcad
|
||||
catchpkrsshcqhas ceeilbednlyrxs
|
||||
cknvreplloiupdi cdjzhawkzcxgens ccyaqenssokzkhs
cslassuoifbuuhications uclozioimfaxkes tofayouloatxevs
|
||||
cwlpcdyothsjnark
cdobevrmlocipnqe mqlafuthqysumanfise
|
||||
cruqagoerposes
cmxudfrpdgfiel
cupbeehamgrpcehr dvyapbunxzted wyemdvfecelft qhodsaltzlveyv
gdeveitefpztiotn
edeoevoapuxiatosr
|
||||
ddoopoggonvcer
|
||||
elbiiecztrodialcshzs enrravizosxejhling
jeveenkjetcrkare enhexplhlzqmter fcpexndupostxulatfdie codrcgtbyoregiczed fdqagvvouybrenr wfekstcioscendsp
dmyipesrsormbs finsrskusrrzal flguenrlkeitns jfyabrmespbrke
|
||||
xcjggupiaeesr
|
||||
qgnlmauktjhiesot kgyelaifonpas pgperriemgaztqircs fbggczcmaeanium
|
||||
gramgiwkcpndinls eggrwjpujnvehs hjounneaibs hyoivihdkqmen hxyhqspesjrsezuml htbsnpolwcoeigsys ijbncmeslhepcd insfjcqukkknlps
|
||||
intelfylecjtbnuzally jintietligentmaol
kreybepings
lrmxiwqaksh iljmwkaayerbkings ztlaeciej juliairmbdums
|
||||
llonghefaktdded
ltauolragteidvwe gyxriefstms xoxmmalrioriefocs
|
||||
makuezqaaiticicazhs evgmaatmzjqboiks
ixgglcgeqax
amesoqtchjbzilsiza
|
||||
miadsmeiijhkicaly
|
||||
vqmiljlyirtanddiacn
|
||||
zmqiscartrdioaafwjge
gdclgyvbs hmhoamduljajue mhonmrecbxwkvucaally krottcixzzb
gbmwuucyklvurktk crfmhuflfdgukardl cmrualpisnerpnske wnkijgifgtits vsimnhoncokital
|
||||
qfgotxnpossesjsion ormpitfeimised
|
||||
joutbbrsazlqenez
|
||||
opouvxytlcomtpmyete
|
||||
jfriotuvtggparwn outkeypxobrd cojqvvzerfaqpadt botjvhiekuflzlew ahonrmeryhrfoe wpkalbqpasttorhnzy
pyirnpkdtes psevnxtvrwsatoehs phstdimjoxdmtnic
lhotxoidequecktor bdziropsciva pmitomauncu
|
||||
plsctfnejvnks ohplaeyrobwcerciopid polymorpujicllymrlnu
prainevkehfjd bplftearloxuobd jcpquvjnagglvs iapauissyfooqhqters
|
||||
qhuscawdsdeatnat jvujmjueurfe hexyraciyt
|
||||
radvuoteqmgemtetrwic rdrekfashqqiocivg
|
||||
xreuscfldlgs vefjuecrskkubeisher
zruoinmearvisumwewed qeeijiepaerinv
rdeniyvftiziwns revelwgptorivn
|
||||
yresirqtdyenfcmies
|
||||
ruyensmebting
brerohkpbkiest rlojibtarirekihs
|
||||
vrufevbl rummabgemtzrs sacrramerlnstajl fsazprjmdlllohd
|
||||
sscvurmbeencvuxs
sspavatmzbxding qalcpeotoffh mssejglvtesjom selsxlvnbeswsuntess
sapkfslnwigeb
tsgiheluxwfifheieys escrjhtettdlandc
mhunnnisnrrg
sofiveiwrsuidve
swbdwfmeeys sklnugobgerzs sxdqneagahmnkg
|
||||
xsgrtosryhtyeilliweng
|
||||
syoubstitahueygivt sugararogbgusspt tadracpoeopts teldsptffdy tqwehstwihfyimkkg
ztqozrmtowxricihd
sjtkihdsenxtotgs
rtdriprarotputfe vltrahazjdrqdikkous
|
||||
uoniotmpaaqohabrly
kunhsiksllflybulmly wuettqekrqrofst vansliakzzidj vgzpluiisvejs wvhuxeeelchnairbs cwyikreedqawrwork
|
1
breathalyzer/data/input
Normal file
1
breathalyzer/data/input
Normal file
@ -0,0 +1 @@
|
||||
tihs sententcnes iss nout varrry goud
|
1
breathalyzer/data/lol.in
Normal file
1
breathalyzer/data/lol.in
Normal file
@ -0,0 +1 @@
|
||||
az a serviec ta eets uzrz, facebook wud lieks ta detect wehn wall postz iz sow besotteded wif errorz dat dey noes kan possibly b whut teh uzr meaned ta express teh aforementioneded wall post wud b kaerfoolly preserveded in a jar foar futurz reference adn eets author requesteded ta perform a onlien brefalyzah test foar sobriety ur challengeded ta riet a program dat kan taek a body uv text adn deturmien hao menny mistakez haz bein maeded in eets composishun speed adn efficiency iz paramount az dis puzzle haz restrictiev boundz dat may b narrowah than prior puzzlez
|
84
breathalyzer/data/spelling-tests.pl
Executable file
84
breathalyzer/data/spelling-tests.pl
Executable file
@ -0,0 +1,84 @@
|
||||
#!/usr/bin/perl
|
||||
use warnings;
|
||||
# cmdline args are, in order:
|
||||
$input=shift || "/var/tmp/twl06.txt"; #just a source for garbled input words, not necessarily the actual dict
|
||||
$maxdist=shift || 0; # applies this many edits, but edit dist will be <= this bound (may accidentally edit back closer to another dict word)
|
||||
$prob_dict_to_input=shift || .001; # portion of dict that is kept on average
|
||||
$seed=shift || 187;
|
||||
$pre=shift || ""; # prepend to every output word
|
||||
$suf=shift || ""; # append to every
|
||||
$also_orig=shift || 0; # if true => also generate original word^N times no spaces
|
||||
$first_char=shift || "a"; # try setting to "`", which is 1 less than a if you want to check non-alphabetic inputs. anything outside range gets set to $first_char
|
||||
|
||||
$last_char="z";
|
||||
|
||||
srand($seed);
|
||||
$first_ord=ord $first_char;
|
||||
$n_ord=ord($last_char) - $first_ord + 1;
|
||||
$last_ord=$first_ord+$n_ord-1;
|
||||
|
||||
sub clamp_ord {
|
||||
my ($o)=@_;
|
||||
($o<$first_ord || $o >$last_ord) ? $first_ord : $o
|
||||
}
|
||||
sub clamp_str {
|
||||
my ($s)=@_;
|
||||
# pack("C*",map { clamp_ord($_) } unpack("C*",$s));
|
||||
$s
|
||||
}
|
||||
sub maybe {
|
||||
rand() <.2 ? 1 : 0
|
||||
}
|
||||
sub rand_ws {
|
||||
&maybe ? " " : &maybe ? "\t" : &maybe ? "\n" : &maybe ? "\r" : &rand_ws
|
||||
}
|
||||
sub keep {
|
||||
rand() < $prob_dict_to_input
|
||||
}
|
||||
sub rand_i {
|
||||
int(rand $_[0]);
|
||||
}
|
||||
sub rand_ord {
|
||||
&rand_i($n_ord) + $first_ord
|
||||
}
|
||||
sub rand_alph {
|
||||
chr(&rand_ord)
|
||||
}
|
||||
sub rand_edit { # note: may produce an empty word, which will disappear in parsing
|
||||
my ($s)=@_;
|
||||
my ($i)=&rand_i(length $s);
|
||||
substr($s,$i,&maybe)=&maybe?"":&rand_alph;
|
||||
$s
|
||||
}
|
||||
sub rand_editn {
|
||||
my ($s)=@_;
|
||||
$s=&rand_edit($s) for(1..$maxdist);
|
||||
$s
|
||||
}
|
||||
open(DICT,'<',$input) || die "no $input";
|
||||
$Nin=0;
|
||||
@words=();
|
||||
$prelen=0;
|
||||
while(<DICT>) {
|
||||
while(/(\S+)/g) {
|
||||
$word=clamp_str(lc($1));
|
||||
++$Nin;
|
||||
if (&keep) { # choose first: consistent subset of words given seed
|
||||
push @words,$word;
|
||||
}
|
||||
}
|
||||
}
|
||||
@words=("empty") unless scalar @words;
|
||||
$N=scalar @words;
|
||||
$postlen=0;
|
||||
for my $word (@words) {
|
||||
$prelen+=length $word;
|
||||
$wr=&rand_editn($word);
|
||||
$postlen+=length $wr;
|
||||
print $pre,$wr,$suf,&rand_ws;
|
||||
$wx=$word x $also_orig;
|
||||
print $wx,&rand_ws if $also_orig;
|
||||
}
|
||||
$avgpre=$prelen/$N;
|
||||
$avgpost=$postlen/$N;
|
||||
print STDERR "\n$N of $Nin possible words selected, $maxdist edits applied to each (avg length $avgpre => $avgpost). Max total possible edit dist=".$N*$maxdist."\n";
|
178691
breathalyzer/data/twl06.txt
Normal file
178691
breathalyzer/data/twl06.txt
Normal file
File diff suppressed because it is too large
Load Diff
1299
breathalyzer/levenshtein.c
Normal file
1299
breathalyzer/levenshtein.c
Normal file
File diff suppressed because it is too large
Load Diff
5
breathalyzer/levenshtein.pyx
Normal file
5
breathalyzer/levenshtein.pyx
Normal file
@ -0,0 +1,5 @@
|
||||
cdef extern from "levenshtein_distance.h":
|
||||
int levenshtein_distance(char *s,char *t)
|
||||
|
||||
def levenshtein(char* s1, char* s2):
|
||||
return levenshtein_distance(s1, s2)
|
47
breathalyzer/levenshtein_distance.c
Normal file
47
breathalyzer/levenshtein_distance.c
Normal file
@ -0,0 +1,47 @@
|
||||
#include <stdlib.h>
|
||||
#include <malloc.h>
|
||||
#include <string.h>
|
||||
|
||||
/*Compute levenshtein distance between s and t*/
|
||||
int levenshtein_distance(char *s, char*t) {
|
||||
//Step 1
|
||||
int i, j, k, n, m, cost, *d, distance;
|
||||
n = strlen(s);
|
||||
m = strlen(t);
|
||||
if (n != 0 && m != 0) {
|
||||
d = malloc((sizeof(int)) * (m + 1) * (n + 1));
|
||||
m++;
|
||||
n++;
|
||||
//Step 2
|
||||
for (k = 0; k < n; k++)
|
||||
d[k] = k;
|
||||
for (k = 0; k < m; k++)
|
||||
d[k*n] = k;
|
||||
//Step 3 and 4
|
||||
for (i = 1; i < n; i++)
|
||||
for (j = 1; j < m; j++) {
|
||||
//Step 5
|
||||
if (s[i-1] == t[j-1])
|
||||
cost = 0;
|
||||
else
|
||||
cost = 1;
|
||||
//Step 6
|
||||
d[j*n+i] = minimum(d[(j-1)*n + i] + 1, d[j*n+i-1] + 1, d[(j-1)*n + i - 1] + cost);
|
||||
}
|
||||
distance = d[n*m-1];
|
||||
free(d);
|
||||
return distance;
|
||||
}
|
||||
else
|
||||
return -1; //a negative return value means that one or both strings are empty.
|
||||
}
|
||||
|
||||
/*Gets the minimum of three values*/
|
||||
int minimum(int a, int b, int c) {
|
||||
int min = a;
|
||||
if (b < min)
|
||||
min = b;
|
||||
if (c < min)
|
||||
min = c;
|
||||
return min;
|
||||
}
|
3
breathalyzer/levenshtein_distance.h
Normal file
3
breathalyzer/levenshtein_distance.h
Normal file
@ -0,0 +1,3 @@
|
||||
/* This file was automatically generated. Do not edit! */
|
||||
int minimum(int a,int b,int c);
|
||||
int levenshtein_distance(char *s,char *t);
|
2
hoppity/Makefile
Normal file
2
hoppity/Makefile
Normal file
@ -0,0 +1,2 @@
|
||||
hoppity: hoppity.hs Makefile
|
||||
ghc -O2 -o $@ $<
|
21
hoppity/hoppity.hs
Normal file
21
hoppity/hoppity.hs
Normal file
@ -0,0 +1,21 @@
|
||||
module Main () where
|
||||
|
||||
import System.Environment
|
||||
import Control.Monad
|
||||
import Data.Maybe
|
||||
|
||||
hoppityfy :: Int -> Maybe String
|
||||
hoppityfy n
|
||||
| n `mod` 3 == 0 && n `mod` 5 == 0 = Just "Hop"
|
||||
| n `mod` 3 == 0 = Just "Hoppity"
|
||||
| n `mod` 5 == 0 = Just "Hophop"
|
||||
| otherwise = Nothing
|
||||
|
||||
main :: IO ()
|
||||
main = do
|
||||
[filename] <- getArgs
|
||||
content <- readFile filename
|
||||
let n = (read content) :: Int
|
||||
let hops = catMaybes . map hoppityfy $ [1 .. n]
|
||||
forM_ hops $ \h -> putStr . (++ "\n") $ h
|
||||
|
12
liarliar/input
Normal file
12
liarliar/input
Normal file
@ -0,0 +1,12 @@
|
||||
5
|
||||
Stephen 1
|
||||
Tommaso
|
||||
Tommaso 1
|
||||
Galileo
|
||||
Isaac 1
|
||||
Tommaso
|
||||
Galileo 1
|
||||
Tommaso
|
||||
George 2
|
||||
Isaac
|
||||
Stephen
|
45
liarliar/liarliar
Executable file
45
liarliar/liarliar
Executable file
@ -0,0 +1,45 @@
|
||||
#!/usr/bin/python
|
||||
|
||||
import sys
|
||||
from collections import defaultdict
|
||||
from itertools import imap
|
||||
|
||||
graph = defaultdict(set)
|
||||
|
||||
input_file = open(sys.argv[1])
|
||||
no_of_veterans = input_file.readline().strip()
|
||||
|
||||
for i in xrange(int(no_of_veterans)):
|
||||
(veteran, no_of_liars) = input_file.readline().strip().split()
|
||||
for j in xrange(int(no_of_liars)):
|
||||
liar = input_file.readline().strip()
|
||||
graph[veteran].add(liar)
|
||||
graph[liar].add(veteran)
|
||||
|
||||
input_file.close()
|
||||
|
||||
def visit_node(node, mode, partition, seen_nodes):
|
||||
#print "!%s" % node
|
||||
if not node in seen_nodes:
|
||||
partition[node] = mode
|
||||
#print ">%s:%s" % (node, mode)
|
||||
seen_nodes.add(node)
|
||||
for child_node in graph[node]:
|
||||
visit_node(child_node, not mode, partition, seen_nodes)
|
||||
|
||||
seen_nodes = set()
|
||||
partition = dict()
|
||||
for veteran in graph.keys():
|
||||
#print "^%s" % veteran
|
||||
if veteran not in seen_nodes:
|
||||
visit_node(veteran, True, partition, seen_nodes)
|
||||
|
||||
def quantify(iterable, pred=bool):
|
||||
"Count how many times the predicate is true"
|
||||
return sum(imap(pred, iterable))
|
||||
|
||||
#print graph
|
||||
#print partition
|
||||
truthers = quantify(partition.iteritems(), lambda (k,v):v)
|
||||
liars = len(partition) - truthers
|
||||
print "%s %s" % (max(truthers, liars), min(truthers, liars))
|
Loading…
Reference in New Issue
Block a user