Added solution to rubyquiz6 GEDCOM parser

master
Abhinav Sarkar 2012-08-04 20:55:39 +05:30
parent 453efa4e06
commit 36030e744e
1 changed files with 56 additions and 56 deletions

View File

@ -12,27 +12,27 @@ import System.IO
-- a line in a GEDCOM document -- a line in a GEDCOM document
data Line = Line { data Line = Line {
lineLevel :: Int, lineLevel :: Int,
lineTag :: String, lineTag :: String,
lineValue :: Maybe String, lineValue :: Maybe String,
lineId :: Maybe String lineId :: Maybe String
} deriving (Show) } deriving (Show)
-- an element in a GEDCOM document -- an element in a GEDCOM document
data Elem = Elem { data Elem = Elem {
elemTag :: String, elemTag :: String,
elemValue :: Maybe String, elemValue :: Maybe String,
elemId :: Maybe String, elemId :: Maybe String,
elemChildren :: [Elem] elemChildren :: [Elem]
} deriving (Show) } deriving (Show)
indent n = concat . (replicate n) $ " " indent n = concat . (replicate n) $ " "
trimValue value = case value of trimValue value = case value of
Nothing -> Nothing Nothing -> Nothing
Just v Just v
| v == "" -> Nothing | v == "" -> Nothing
| otherwise -> Just v | otherwise -> Just v
normalizeValue = maybe "" id normalizeValue = maybe "" id
@ -41,23 +41,23 @@ whitespaces = many (char ' ' <|> tab <|> newline)
-- parses a line -- parses a line
line level = do line level = do
string (show level) string (show level)
spaces spaces
id <- optionMaybe $ between (char '@') (char '@') (many1 alphaNum) id <- optionMaybe $ between (char '@') (char '@') (many1 alphaNum)
spaces spaces
tag <- many1 upper tag <- many1 upper
spaces spaces
value <- fmap trimValue $ optionMaybe $ manyTill (anyChar) newline value <- fmap trimValue $ optionMaybe $ manyTill (anyChar) newline
return $ Line level tag value id return $ Line level tag value id
-- parses an element -- parses an element
element level = do element level = do
ml <- optionMaybe $ line level ml <- optionMaybe $ line level
case ml of case ml of
Nothing -> fail ("invalid level " ++ show level) Nothing -> fail ("invalid level " ++ show level)
Just Line{..} -> do Just Line{..} -> do
children <- many (element $ level + 1) children <- many (element $ level + 1)
return $ Elem lineTag lineValue lineId children return $ Elem lineTag lineValue lineId children
-- parses a document -- parses a document
document = (element 0) `endBy` whitespaces document = (element 0) `endBy` whitespaces
@ -65,44 +65,44 @@ document = (element 0) `endBy` whitespaces
-- normalizes an element by merging values of CONC and CONT -- normalizes an element by merging values of CONC and CONT
-- elements with parent element value -- elements with parent element value
normalizeElem element = normalizeElem element =
let let
conChildren = filter concOrCont $ elemChildren element conChildren = filter concOrCont $ elemChildren element
text = foldl (\t el -> t text = foldl (\t el -> t
++ (if elemTag el == "CONC" then "\n" else " ") ++ (if elemTag el == "CONC" then "\n" else " ")
++ normalizeValue (elemValue el)) ++ normalizeValue (elemValue el))
"" conChildren "" conChildren
nonConChildren = filter (not . concOrCont) $ elemChildren element nonConChildren = filter (not . concOrCont) $ elemChildren element
in in
element { elemValue = trimValue $ element { elemValue = trimValue $
Just (normalizeValue (elemValue element) ++ text), Just (normalizeValue (elemValue element) ++ text),
elemChildren = map normalizeElem nonConChildren } elemChildren = map normalizeElem nonConChildren }
where where
concOrCont el = elemTag el `elem` ["CONC", "CONT"] concOrCont el = elemTag el `elem` ["CONC", "CONT"]
-- normalizes a document -- normalizes a document
normalizeDoc = map normalizeElem normalizeDoc = map normalizeElem
-- converts an element to XML -- converts an element to XML
elemToXml indentation Elem{..} = elemToXml indentation Elem{..} =
indent indentation indent indentation
++ "<" ++ elemTag ++ "<" ++ elemTag
++ maybe "" (\i -> " id=\"@" ++ i ++ "@\"") elemId ++ maybe "" (\i -> " id=\"@" ++ i ++ "@\"") elemId
++ case elemChildren of ++ case elemChildren of
[] -> ">" ++ normalizeValue elemValue ++ "</" ++ elemTag ++ ">" [] -> ">" ++ normalizeValue elemValue ++ "</" ++ elemTag ++ ">"
_ -> maybe "" (\v -> " value=\"" ++ v ++ "\"") elemValue ++ ">\n" _ -> maybe "" (\v -> " value=\"" ++ v ++ "\"") elemValue ++ ">\n"
++ unlines (map (elemToXml (indentation + 1)) elemChildren) ++ unlines (map (elemToXml (indentation + 1)) elemChildren)
++ indent indentation ++ "</" ++ elemTag ++ ">" ++ indent indentation ++ "</" ++ elemTag ++ ">"
-- converts a document to XML -- converts a document to XML
documentToXml doc = "<DOCUMENT>\n" documentToXml doc = "<DOCUMENT>\n"
++ (unlines . map (elemToXml 1) $ doc) ++ (unlines . map (elemToXml 1) $ doc)
++ "</DOCUMENT>" ++ "</DOCUMENT>"
-- converts a GEDCOM document supplied through STDIN into XML -- converts a GEDCOM document supplied through STDIN into XML
-- and prints to STDOUT -- and prints to STDOUT
main = do main = do
text <- getContents text <- getContents
case parse document "GEDCOM Parser" text of case parse document "GEDCOM Parser" text of
Right [] -> return () Right [] -> return ()
Right doc -> putStrLn $ documentToXml (normalizeDoc doc) Right doc -> putStrLn $ documentToXml (normalizeDoc doc)
Left e -> print e Left e -> print e