Adds the ability to run as an executable.

- Adds a parser to parse yaml file into tables and facts
- Adds program argument handling for specifying program settings
pull/1/head
Abhinav Sarkar 2015-12-17 20:02:13 +05:30
parent 900b4b7488
commit 4a5233a1a2
8 changed files with 241 additions and 25 deletions

4
.gitignore vendored
View File

@ -1,6 +1,8 @@
src/Ringo/Tables.hs
Tables.hs
tables.yaml
dist
cabal-dev
.DS_STORE
*.o
*.hi
*.chi

View File

@ -1,6 +1,39 @@
module Main where
import qualified Data.Text as Text
import Data.List (nub)
import System.Exit (exitFailure, exitSuccess)
import Ringo
import Ringo.ArgParser
import Ringo.InputParser
main :: IO ()
main = undefined
main = do
ProgArgs {..} <- parseArgs
result <- parseInput progInputFile
case result of
Left err -> putStrLn err >> exitFailure
Right (tables, facts) -> do
let env = Env tables facts progSettings
let errors = nub $ concatMap (validateTable env) tables ++ concatMap (validateFact env) facts
if not $ null errors
then mapM print errors >> exitFailure
else do
let dimTables = map (\fact -> (fact, extractDimensionTables env fact)) facts
factTables = map (\fact -> (fact, extractFactTable env fact)) facts
dimTableDefnSQLs = [ tabDefnSQL table | (fact, tabs) <- dimTables
, table <- tabs
, table `notElem` tables ]
factTableDefnSQLs = [ tabDefnSQL table | (fact, table) <- factTables ]
mapM_ putStrLn dimTableDefnSQLs
mapM_ putStrLn factTableDefnSQLs
exitSuccess
where
toSQL = Text.unpack . flip Text.snoc ';'
tabDefnSQL = unlines . map toSQL . tableDefnSQL

56
app/Ringo/ArgParser.hs Normal file
View File

@ -0,0 +1,56 @@
module Ringo.ArgParser (ProgArgs(..), parseArgs) where
import qualified Data.Text as Text
import Data.List (intercalate)
import Options.Applicative
import Ringo.Types
data ProgArgs = ProgArgs
{ progSettings :: Settings
, progInputFile :: FilePath
, progOutputDir :: FilePath
} deriving (Eq, Show)
settingsParser :: Parser Settings
settingsParser = let Settings {..} = defSettings
in Settings
<$> (Text.pack <$> strOption (long "dim-prefix"
<> short 'd'
<> value (Text.unpack settingDimPrefix)
<> showDefault
<> help "Prefix for dimension tables"))
<*> (Text.pack <$> strOption (long "fact-prefix"
<> short 'f'
<> value (Text.unpack settingFactPrefix)
<> showDefault
<> help "Prefix for fact tables"))
<*> option auto (let timeunits = map show [Second ..]
in long "timeunit"
<> short 't'
<> value settingTimeUnit
<> showDefault
<> completeWith timeunits
<> help ("Time unit granularity for fact tables. Possible values: "
++ intercalate ", " timeunits))
progArgsParser :: Parser ProgArgs
progArgsParser =
ProgArgs
<$> settingsParser
<*> argument str (metavar "INPUT"
<> action "file"
<> help "Input file")
<*> argument str (metavar "OUTPUT"
<> action "directory"
<> help "Output directory")
parseArgs :: IO ProgArgs
parseArgs = execParser opts
where
opts = info (helper <*> progArgsParser)
(fullDesc
<> progDesc "Transforms OLTP database schemas to OLAP database star schemas"
<> header "ringo - OLTP to OLAP schema transformer"
<> footer "Source: http://github.com/quintype/ringo")

76
app/Ringo/InputParser.hs Normal file
View File

@ -0,0 +1,76 @@
module Ringo.InputParser (parseInput) where
import qualified Data.Text as Text
import qualified Data.Vector as V
import Data.Maybe (fromMaybe)
import Data.Yaml hiding (Null)
import Data.Vector ((!), (!?))
import Ringo.Types
instance FromJSON Nullable where
parseJSON (String s) = case s of
"null" -> pure Null
"notnull" -> pure NotNull
_ -> fail $ "Invalid value for nullable: " ++ Text.unpack s
parseJSON o = fail $ "Cannot parse nullable: " ++ show o
instance FromJSON Column where
parseJSON (Array a) = if V.length a < 2
then fail $ "Column needs at least two elements: name and type"
else do
cName <- parseJSON $ a ! 0
cType <- parseJSON $ a ! 1
cNull <- parseJSON $ fromMaybe "null" (a !? 2)
return $ Column cName cType cNull
parseJSON o = fail $ "Cannot parse column: " ++ show o
instance FromJSON TableConstraint where
parseJSON (Object o) = do
cType <- o .: "type"
case cType of
"primary" -> PrimaryKey <$> o .: "column"
"unique" -> UniqueKey <$> o .: "columns"
"foreign" -> ForeignKey <$> o .: "table" <*> o .: "columns"
_ -> fail $ "Invalid constraint type: " ++ cType
parseJSON o = fail $ "Cannot parse constraint: " ++ show o
instance FromJSON Table where
parseJSON (Object o) = Table <$> o .: "name" <*> o .: "columns" <*> o .: "constraints"
parseJSON o = fail $ "Cannot parse table: " ++ show o
instance FromJSON FactColumn where
parseJSON (Object o) = do
cType <- o .: "type"
case cType of
"dimtime" -> DimTime <$> o .: "column"
"nodimid" -> NoDimId <$> o .: "column"
"dimid" -> DimId <$> o .: "table" <*> o .: "column"
"dimval" -> DimVal <$> o .: "table" <*> o .: "column"
"factcount" -> FactCount <$> o .: "column"
"factsum" -> FactSum <$> o .: "sourcecolumn" <*> o .: "column"
"factaverage" -> FactAverage <$> o .: "sourcecolumn" <*> o .: "column"
"factcountdistinct" -> FactCountDistinct <$> o .: "column"
_ -> fail $ "Invalid fact column type: " ++ cType
parseJSON o = fail $ "Cannot parse fact column: " ++ show o
instance FromJSON Fact where
parseJSON (Object o) = Fact <$> o .: "name"
<*> o .: "tablename"
<*> o .:? "parentfacts" .!= []
<*> o .: "columns"
parseJSON o = fail $ "Cannot parse fact: " ++ show o
data Input = Input [Table] [Fact] deriving (Eq, Show)
instance FromJSON Input where
parseJSON (Object o) = Input <$> o .: "tables" <*> o .: "facts"
parseJSON o = fail $ "Cannot parse input: " ++ show o
parseInput :: FilePath -> IO (Either String ([Table], [Fact]))
parseInput file = do
result <- decodeFileEither file
return $ case result of
Left pe -> Left $ prettyPrintParseException pe
Right (Input tables facts) -> Right (tables, facts)

View File

@ -1,6 +1,6 @@
name: ringo
version: 0.1.0.0
synopsis: Tool to transform OLTP database schemas to OLAP database schemas automatically
synopsis: Tool to transform OLTP database schemas to OLAP database star schemas automatically
description: Please see README.md
homepage: http://github.com/quintype/ringo#readme
license: MIT
@ -15,11 +15,19 @@ cabal-version: >=1.20
library
hs-source-dirs: src
exposed-modules: Ringo
build-depends: base >= 4.7 && < 5
, text
, containers
, mtl
exposed-modules: Ringo,
Ringo.Extractor,
Ringo.Generator,
Ringo.Validator,
Ringo.Types
other-modules: Ringo.Extractor.Internal,
Ringo.Utils
build-depends: base >=4.7 && <5,
text >=1.2 && <1.3,
containers >=0.5 && <0.6,
mtl >=2.2 && <2.3
ghc-options: -Wall -fno-warn-unused-do-bind -funbox-strict-fields -fno-warn-orphans
default-extensions: OverloadedStrings, RecordWildCards, ScopedTypeVariables,
BangPatterns, TupleSections, NamedFieldPuns, GeneralizedNewtypeDeriving,
DeriveDataTypeable
@ -27,11 +35,19 @@ library
executable ringo
hs-source-dirs: app
other-modules: Ringo.ArgParser,
Ringo.InputParser
main-is: Main.hs
build-depends: base
, ringo
, pretty-show
ghc-options: -Wall -fno-warn-unused-do-bind -funbox-strict-fields -fno-warn-orphans
build-depends: base >=4.7 && <5,
text >=1.2 && <1.3,
optparse-applicative >=0.11 && <0.12,
yaml >=0.8 && <0.9,
vector >=0.10 && <0.11,
ringo
ghc-options: -Wall -fno-warn-unused-do-bind -funbox-strict-fields -fno-warn-orphans -O2
default-extensions: OverloadedStrings, RecordWildCards, ScopedTypeVariables,
BangPatterns, TupleSections, NamedFieldPuns, GeneralizedNewtypeDeriving,
DeriveDataTypeable
default-language: Haskell2010
test-suite ringo-test

View File

@ -1,11 +1,36 @@
module Ringo
( module Ringo.Types
, module Ringo.Extractor
, module Ringo.Generator
, extractFactTable
, extractDimensionTables
, G.tableDefnSQL
, dimensionTableInsertSQL
, factTableInsertSQL
, validateTable
, validateFact
) where
import Ringo.Types
import Ringo.Extractor
import Ringo.Generator
-- import qualified Ringo.Tables as Tables
import Control.Monad.Reader (runReader)
import Data.Text (Text)
import Ringo.Types
import qualified Ringo.Extractor as E
import qualified Ringo.Generator as G
import qualified Ringo.Validator as V
extractFactTable :: Env -> Fact -> Table
extractFactTable env = flip runReader env . E.extractFactTable
extractDimensionTables :: Env -> Fact -> [Table]
extractDimensionTables env = flip runReader env . E.extractDimensionTables
dimensionTableInsertSQL :: Env -> Fact -> TableName -> Text
dimensionTableInsertSQL env fact = flip runReader env . G.dimensionTableInsertSQL fact
factTableInsertSQL :: Env -> Fact -> Text
factTableInsertSQL env = flip runReader env . G.factTableInsertSQL
validateTable :: Env -> Table -> [ValidationError]
validateTable env = flip runReader env . V.validateTable
validateFact :: Env -> Fact -> [ValidationError]
validateFact env = flip runReader env . V.validateFact

View File

@ -1,7 +1,7 @@
module Ringo.Types where
import Data.Text (Text)
import qualified Data.Text as T
import qualified Data.Text as Text
type ColumnName = Text
type ColumnType = Text
@ -27,10 +27,10 @@ data Table = Table
} deriving (Eq, Show)
data TimeUnit = Second | Minute | Hour | Day | Week
deriving (Eq, Enum, Show)
deriving (Eq, Enum, Show, Read)
timeUnitName :: TimeUnit -> Text
timeUnitName = T.toLower . T.pack . show
timeUnitName = Text.toLower . Text.pack . show
timeUnitToSeconds :: TimeUnit -> Int
timeUnitToSeconds Second = 1
@ -46,8 +46,6 @@ data Fact = Fact
, factColumns :: ![FactColumn]
} deriving (Eq, Show)
data FactValType = Count | Sum | Average | CountDistinct deriving (Eq, Enum, Show)
data FactColumn = DimTime !ColumnName
| NoDimId !ColumnName
| DimId !TableName !ColumnName

View File

@ -37,7 +37,7 @@ validateFact Fact {..} = do
Just table -> do
tableVs <- validateTable table
parentVs <- concat <$> mapM checkFactParents factParentNames
let colVs = concatMap (checkColumn table) factColumns
let colVs = concatMap (checkColumn tables table) factColumns
return $ tableVs ++ parentVs ++ colVs
where
checkFactParents fName = do
@ -46,7 +46,17 @@ validateFact Fact {..} = do
Nothing -> return [ MissingFact fName ]
Just pFact -> validateFact pFact
checkColumn table = maybe [] (checkTableForCol table) . factColumnName
checkColumn tables table factCol =
maybe [] (checkTableForCol table) (factColumnName factCol)
++ checkColumnTable tables factCol
checkColumnTable tables factCol = case factCol of
DimId tName _ -> go tName
_ -> []
where
go tName = case findTable tName tables of
Nothing -> [ MissingTable tName ]
Just _ -> []
withFactValidation :: Fact -> Reader Env a -> Reader Env (Either [ValidationError] a)
withFactValidation fact func = do