|
|
|
@ -1,14 +1,26 @@
|
|
|
|
|
{-
|
|
|
|
|
{-|
|
|
|
|
|
A solution to rubyquiz 13 (<http://rubyquiz.com/quiz13.html>).
|
|
|
|
|
|
|
|
|
|
Decrypts a cryptogram (a substitution cypher).
|
|
|
|
|
A solution to rubyquiz 13 (http://rubyquiz.com/quiz13.html).
|
|
|
|
|
Usage: ./Cryptograms dictionary_file encrypted_file num_max_mappings
|
|
|
|
|
|
|
|
|
|
Copyright 2012 Abhinav Sarkar <abhinav@abhinavsarkar.net>
|
|
|
|
|
The decryption mapping is created by finding the words from the dictionary which have
|
|
|
|
|
the same form as the given token to translates, using the words to create partial
|
|
|
|
|
mappings and then merging these partial mappings to get more complete mappings.
|
|
|
|
|
|
|
|
|
|
Similarly, the partial mappings for all tokens are merged together to get the
|
|
|
|
|
complete mapping. The mappings are scored by how many tokens they are able to
|
|
|
|
|
translated successfully into dictionary words.
|
|
|
|
|
|
|
|
|
|
Usage:
|
|
|
|
|
|
|
|
|
|
> ./Cryptograms <dictionary_file> <encrypted_file> <num_max_mappings>
|
|
|
|
|
|
|
|
|
|
Copyright 2012 Abhinav Sarkar \<abhinav\@abhinavsarkar.net\>
|
|
|
|
|
-}
|
|
|
|
|
|
|
|
|
|
{-# LANGUAGE BangPatterns #-}
|
|
|
|
|
|
|
|
|
|
module Cryptograms (Mapping, Dict, readDict, translateToken,
|
|
|
|
|
module Cryptograms (Mapping, Token, Dict, readDict, translateToken,
|
|
|
|
|
scoreMapping, findBestMappings, showMapping, main)
|
|
|
|
|
where
|
|
|
|
|
|
|
|
|
@ -26,11 +38,16 @@ import Text.Printf (printf)
|
|
|
|
|
trace :: String -> a -> a
|
|
|
|
|
trace _ x = x
|
|
|
|
|
|
|
|
|
|
-- | A mapping between two alphabets.
|
|
|
|
|
type Mapping = M.Map Char Char
|
|
|
|
|
|
|
|
|
|
-- | An encrypted token.
|
|
|
|
|
type Token = String
|
|
|
|
|
|
|
|
|
|
-- | The dictionary of words.
|
|
|
|
|
newtype Dict = Dict (M.Map Int (S.Set String))
|
|
|
|
|
|
|
|
|
|
-- reads the dictionary from the given file. must contain one word per line.
|
|
|
|
|
-- | Reads the dictionary from the given file. Must contain one word per line.
|
|
|
|
|
readDict :: FilePath -> IO Dict
|
|
|
|
|
readDict filePath = do
|
|
|
|
|
!dictWords <- fmap (filter (all isAlpha) . map (map toLower) . lines)
|
|
|
|
@ -39,40 +56,46 @@ readDict filePath = do
|
|
|
|
|
foldl' (\dict w -> M.insertWith S.union (length w) (S.singleton w) dict)
|
|
|
|
|
M.empty dictWords
|
|
|
|
|
|
|
|
|
|
-- translates the token using the given mapping.
|
|
|
|
|
-- return Nothing if unable to translate.
|
|
|
|
|
translateToken :: Mapping -> String -> Maybe String
|
|
|
|
|
-- | Translates the token using the given mapping.
|
|
|
|
|
-- Returns @Nothing@ if unable to translate.
|
|
|
|
|
translateToken :: Mapping -> Token -> Maybe String
|
|
|
|
|
translateToken mapping = fmap reverse
|
|
|
|
|
. foldM (\acc char -> M.lookup char mapping >>= Just . (:acc)) ""
|
|
|
|
|
|
|
|
|
|
-- translates all tokens using the given mapping.
|
|
|
|
|
-- translates the token to '---' if unable to translate.
|
|
|
|
|
translateTokens :: Mapping -> [String] -> [String]
|
|
|
|
|
-- Translates all tokens using the given mapping.
|
|
|
|
|
-- Translates the token to '---' if unable to translate.
|
|
|
|
|
translateTokens :: Mapping -> [Token] -> [String]
|
|
|
|
|
translateTokens mapping =
|
|
|
|
|
map (\token ->
|
|
|
|
|
fromMaybe (replicate (length token ) '-') . translateToken mapping $ token)
|
|
|
|
|
|
|
|
|
|
-- checks if the given word is in the dictionary.
|
|
|
|
|
-- Checks if the given word is in the dictionary.
|
|
|
|
|
inDict :: Dict -> String -> Bool
|
|
|
|
|
inDict (Dict dict) word =
|
|
|
|
|
case M.lookup (length word) dict of
|
|
|
|
|
Nothing -> False
|
|
|
|
|
Just ws -> word `S.member` ws
|
|
|
|
|
|
|
|
|
|
-- scores a mapping by counting the number of translated tokens that are
|
|
|
|
|
-- | Scores a mapping by counting the number of translated tokens that are
|
|
|
|
|
-- in the dictionary.
|
|
|
|
|
scoreMapping :: Dict -> Mapping -> [String] -> Int
|
|
|
|
|
scoreMapping :: Dict -- ^ The dictionary
|
|
|
|
|
-> Mapping -- ^ The mapping to score
|
|
|
|
|
-> [Token] -- ^ The tokens to translate
|
|
|
|
|
-> Int -- ^ The score of the mapping
|
|
|
|
|
scoreMapping dict mapping =
|
|
|
|
|
length . filter (inDict dict) . mapMaybe (translateToken mapping)
|
|
|
|
|
|
|
|
|
|
-- scores multiple mappings and returns an assoc list sorted by descending score.
|
|
|
|
|
scoreMappings :: Dict -> [String] -> [Mapping] -> [(Mapping, Int)]
|
|
|
|
|
-- Scores multiple mappings and returns an assoc list sorted by descending score.
|
|
|
|
|
scoreMappings :: Dict -> [Token] -> [Mapping] -> [(Mapping, Int)]
|
|
|
|
|
scoreMappings dict tokens =
|
|
|
|
|
reverse . sortBy (comparing snd)
|
|
|
|
|
. map (\mapping -> (mapping, scoreMapping dict mapping tokens))
|
|
|
|
|
|
|
|
|
|
-- finds maximum num mappings which have best scores for the given tokens.
|
|
|
|
|
findBestMappings :: Dict -> Int -> [String] -> [Mapping]
|
|
|
|
|
-- | Finds n best mappings which have best scores for the given tokens.
|
|
|
|
|
findBestMappings :: Dict -- ^ The dictionary
|
|
|
|
|
-> Int -- ^ Maximum number of mappings to consider
|
|
|
|
|
-> [Token] -- ^ The tokens to translate
|
|
|
|
|
-> [Mapping] -- ^ The mappings with best scores
|
|
|
|
|
findBestMappings dict num tokens = let
|
|
|
|
|
mappings = scoreMappings dict tokens
|
|
|
|
|
. S.toList
|
|
|
|
@ -86,8 +109,8 @@ findBestMappings dict num tokens = let
|
|
|
|
|
maxScore = if not (null mappings) then snd . head $ mappings else 0
|
|
|
|
|
in map fst . takeWhile ((== maxScore) . snd) $ mappings
|
|
|
|
|
|
|
|
|
|
-- finds the merged mappings for a token
|
|
|
|
|
findMappingsForToken :: Dict -> S.Set Mapping -> String -> S.Set Mapping
|
|
|
|
|
-- Finds the merged mappings for a token
|
|
|
|
|
findMappingsForToken :: Dict -> S.Set Mapping -> Token -> S.Set Mapping
|
|
|
|
|
findMappingsForToken dict mappings token =
|
|
|
|
|
case find (inDict dict) . mapMaybe (flip translateToken token)
|
|
|
|
|
. reverse . sortBy (comparing M.size)
|
|
|
|
@ -99,7 +122,7 @@ findMappingsForToken dict mappings token =
|
|
|
|
|
-- with the mappings for the token.
|
|
|
|
|
Nothing -> mergeMappingLists mappings (createMappingsForToken dict token)
|
|
|
|
|
|
|
|
|
|
-- merges mapping lists. discards conflicting mappings while merging.
|
|
|
|
|
-- Merges mapping lists. discards conflicting mappings while merging.
|
|
|
|
|
mergeMappingLists :: S.Set Mapping -> S.Set Mapping -> S.Set Mapping
|
|
|
|
|
mergeMappingLists mappings1 mappings2
|
|
|
|
|
| mappings1 == S.empty = mappings2
|
|
|
|
@ -113,7 +136,7 @@ mergeMappingLists mappings1 mappings2
|
|
|
|
|
[mergeMappings m1 m2 | m1 <- S.toList mappings1, m2 <- S.toList mappings2]]
|
|
|
|
|
in trace (printf "Merged to %s mappings" (show $ S.size merged)) merged
|
|
|
|
|
|
|
|
|
|
-- merges two mappings. returns Nothing if mappings conflict.
|
|
|
|
|
-- Merges two mappings. returns Nothing if mappings conflict.
|
|
|
|
|
mergeMappings :: Mapping -> Mapping -> Maybe Mapping
|
|
|
|
|
mergeMappings mapping1 mapping2 =
|
|
|
|
|
foldM
|
|
|
|
@ -123,8 +146,8 @@ mergeMappings mapping1 mapping2 =
|
|
|
|
|
else Just $ M.insert k v acc)
|
|
|
|
|
mapping1 $ M.toList mapping2
|
|
|
|
|
|
|
|
|
|
-- creates mappings for a token by finding words of same form from the dictionary.
|
|
|
|
|
createMappingsForToken :: Dict -> String -> S.Set Mapping
|
|
|
|
|
-- Creates mappings for a token by finding words of same form from the dictionary.
|
|
|
|
|
createMappingsForToken :: Dict -> Token -> S.Set Mapping
|
|
|
|
|
createMappingsForToken (Dict dict) token =
|
|
|
|
|
case M.lookup (length token) dict of
|
|
|
|
|
Nothing -> S.empty
|
|
|
|
@ -134,9 +157,9 @@ createMappingsForToken (Dict dict) token =
|
|
|
|
|
. filter ((== tokenF) . tokenForm) . S.toList $ words
|
|
|
|
|
in trace (printf "%s -> %s matches" token (show . S.size $ matches)) matches
|
|
|
|
|
|
|
|
|
|
-- returns form of a token. for example, the form of "abc" is [1,2,3]
|
|
|
|
|
-- Returns form of a token. for example, the form of "abc" is [1,2,3]
|
|
|
|
|
-- while the form of "aba" is [1,2,1].
|
|
|
|
|
tokenForm :: String -> [Int]
|
|
|
|
|
tokenForm :: Token -> [Int]
|
|
|
|
|
tokenForm token = let
|
|
|
|
|
(_, form, _) =
|
|
|
|
|
foldl' (\(formMap, form, lf) char ->
|
|
|
|
@ -146,11 +169,11 @@ tokenForm token = let
|
|
|
|
|
(M.empty, [], 0) token
|
|
|
|
|
in reverse form
|
|
|
|
|
|
|
|
|
|
-- creates the mapping between two strings of same length.
|
|
|
|
|
-- Creates the mapping between two strings of same length.
|
|
|
|
|
getMapping :: String -> String -> Mapping
|
|
|
|
|
getMapping t1 t2 = M.fromList $ zip t1 t2
|
|
|
|
|
|
|
|
|
|
-- returns text representation of a mapping.
|
|
|
|
|
-- | Returns text representation of a mapping.
|
|
|
|
|
showMapping :: Mapping -> String
|
|
|
|
|
showMapping mapping =
|
|
|
|
|
map snd . sortBy (comparing fst) . M.toList
|
|
|
|
|