diff --git a/.gitignore b/.gitignore index c27dc0e..1f8e248 100644 --- a/.gitignore +++ b/.gitignore @@ -1,6 +1,8 @@ -src/Ringo/Tables.hs +Tables.hs +tables.yaml dist cabal-dev +.DS_STORE *.o *.hi *.chi diff --git a/app/Main.hs b/app/Main.hs index 83d03d8..46cb59f 100644 --- a/app/Main.hs +++ b/app/Main.hs @@ -1,6 +1,39 @@ module Main where +import qualified Data.Text as Text + +import Data.List (nub) +import System.Exit (exitFailure, exitSuccess) + import Ringo +import Ringo.ArgParser +import Ringo.InputParser main :: IO () -main = undefined +main = do + ProgArgs {..} <- parseArgs + result <- parseInput progInputFile + case result of + Left err -> putStrLn err >> exitFailure + Right (tables, facts) -> do + let env = Env tables facts progSettings + let errors = nub $ concatMap (validateTable env) tables ++ concatMap (validateFact env) facts + if not $ null errors + then mapM print errors >> exitFailure + else do + let dimTables = map (\fact -> (fact, extractDimensionTables env fact)) facts + factTables = map (\fact -> (fact, extractFactTable env fact)) facts + + dimTableDefnSQLs = [ tabDefnSQL table | (fact, tabs) <- dimTables + , table <- tabs + , table `notElem` tables ] + factTableDefnSQLs = [ tabDefnSQL table | (fact, table) <- factTables ] + + mapM_ putStrLn dimTableDefnSQLs + mapM_ putStrLn factTableDefnSQLs + + exitSuccess + where + toSQL = Text.unpack . flip Text.snoc ';' + tabDefnSQL = unlines . map toSQL . tableDefnSQL + diff --git a/app/Ringo/ArgParser.hs b/app/Ringo/ArgParser.hs new file mode 100644 index 0000000..e68c8be --- /dev/null +++ b/app/Ringo/ArgParser.hs @@ -0,0 +1,56 @@ +module Ringo.ArgParser (ProgArgs(..), parseArgs) where + +import qualified Data.Text as Text + +import Data.List (intercalate) +import Options.Applicative + +import Ringo.Types + +data ProgArgs = ProgArgs + { progSettings :: Settings + , progInputFile :: FilePath + , progOutputDir :: FilePath + } deriving (Eq, Show) + +settingsParser :: Parser Settings +settingsParser = let Settings {..} = defSettings + in Settings + <$> (Text.pack <$> strOption (long "dim-prefix" + <> short 'd' + <> value (Text.unpack settingDimPrefix) + <> showDefault + <> help "Prefix for dimension tables")) + <*> (Text.pack <$> strOption (long "fact-prefix" + <> short 'f' + <> value (Text.unpack settingFactPrefix) + <> showDefault + <> help "Prefix for fact tables")) + <*> option auto (let timeunits = map show [Second ..] + in long "timeunit" + <> short 't' + <> value settingTimeUnit + <> showDefault + <> completeWith timeunits + <> help ("Time unit granularity for fact tables. Possible values: " + ++ intercalate ", " timeunits)) + +progArgsParser :: Parser ProgArgs +progArgsParser = + ProgArgs + <$> settingsParser + <*> argument str (metavar "INPUT" + <> action "file" + <> help "Input file") + <*> argument str (metavar "OUTPUT" + <> action "directory" + <> help "Output directory") + +parseArgs :: IO ProgArgs +parseArgs = execParser opts + where + opts = info (helper <*> progArgsParser) + (fullDesc + <> progDesc "Transforms OLTP database schemas to OLAP database star schemas" + <> header "ringo - OLTP to OLAP schema transformer" + <> footer "Source: http://github.com/quintype/ringo") diff --git a/app/Ringo/InputParser.hs b/app/Ringo/InputParser.hs new file mode 100644 index 0000000..9d99923 --- /dev/null +++ b/app/Ringo/InputParser.hs @@ -0,0 +1,76 @@ +module Ringo.InputParser (parseInput) where + +import qualified Data.Text as Text +import qualified Data.Vector as V + +import Data.Maybe (fromMaybe) +import Data.Yaml hiding (Null) +import Data.Vector ((!), (!?)) +import Ringo.Types + +instance FromJSON Nullable where + parseJSON (String s) = case s of + "null" -> pure Null + "notnull" -> pure NotNull + _ -> fail $ "Invalid value for nullable: " ++ Text.unpack s + parseJSON o = fail $ "Cannot parse nullable: " ++ show o + +instance FromJSON Column where + parseJSON (Array a) = if V.length a < 2 + then fail $ "Column needs at least two elements: name and type" + else do + cName <- parseJSON $ a ! 0 + cType <- parseJSON $ a ! 1 + cNull <- parseJSON $ fromMaybe "null" (a !? 2) + return $ Column cName cType cNull + + parseJSON o = fail $ "Cannot parse column: " ++ show o + +instance FromJSON TableConstraint where + parseJSON (Object o) = do + cType <- o .: "type" + case cType of + "primary" -> PrimaryKey <$> o .: "column" + "unique" -> UniqueKey <$> o .: "columns" + "foreign" -> ForeignKey <$> o .: "table" <*> o .: "columns" + _ -> fail $ "Invalid constraint type: " ++ cType + parseJSON o = fail $ "Cannot parse constraint: " ++ show o + +instance FromJSON Table where + parseJSON (Object o) = Table <$> o .: "name" <*> o .: "columns" <*> o .: "constraints" + parseJSON o = fail $ "Cannot parse table: " ++ show o + +instance FromJSON FactColumn where + parseJSON (Object o) = do + cType <- o .: "type" + case cType of + "dimtime" -> DimTime <$> o .: "column" + "nodimid" -> NoDimId <$> o .: "column" + "dimid" -> DimId <$> o .: "table" <*> o .: "column" + "dimval" -> DimVal <$> o .: "table" <*> o .: "column" + "factcount" -> FactCount <$> o .: "column" + "factsum" -> FactSum <$> o .: "sourcecolumn" <*> o .: "column" + "factaverage" -> FactAverage <$> o .: "sourcecolumn" <*> o .: "column" + "factcountdistinct" -> FactCountDistinct <$> o .: "column" + _ -> fail $ "Invalid fact column type: " ++ cType + parseJSON o = fail $ "Cannot parse fact column: " ++ show o + +instance FromJSON Fact where + parseJSON (Object o) = Fact <$> o .: "name" + <*> o .: "tablename" + <*> o .:? "parentfacts" .!= [] + <*> o .: "columns" + parseJSON o = fail $ "Cannot parse fact: " ++ show o + +data Input = Input [Table] [Fact] deriving (Eq, Show) + +instance FromJSON Input where + parseJSON (Object o) = Input <$> o .: "tables" <*> o .: "facts" + parseJSON o = fail $ "Cannot parse input: " ++ show o + +parseInput :: FilePath -> IO (Either String ([Table], [Fact])) +parseInput file = do + result <- decodeFileEither file + return $ case result of + Left pe -> Left $ prettyPrintParseException pe + Right (Input tables facts) -> Right (tables, facts) diff --git a/ringo.cabal b/ringo.cabal index 532f758..0d0c1dc 100644 --- a/ringo.cabal +++ b/ringo.cabal @@ -1,6 +1,6 @@ name: ringo version: 0.1.0.0 -synopsis: Tool to transform OLTP database schemas to OLAP database schemas automatically +synopsis: Tool to transform OLTP database schemas to OLAP database star schemas automatically description: Please see README.md homepage: http://github.com/quintype/ringo#readme license: MIT @@ -15,11 +15,19 @@ cabal-version: >=1.20 library hs-source-dirs: src - exposed-modules: Ringo - build-depends: base >= 4.7 && < 5 - , text - , containers - , mtl + exposed-modules: Ringo, + Ringo.Extractor, + Ringo.Generator, + Ringo.Validator, + Ringo.Types + other-modules: Ringo.Extractor.Internal, + Ringo.Utils + + build-depends: base >=4.7 && <5, + text >=1.2 && <1.3, + containers >=0.5 && <0.6, + mtl >=2.2 && <2.3 + ghc-options: -Wall -fno-warn-unused-do-bind -funbox-strict-fields -fno-warn-orphans default-extensions: OverloadedStrings, RecordWildCards, ScopedTypeVariables, BangPatterns, TupleSections, NamedFieldPuns, GeneralizedNewtypeDeriving, DeriveDataTypeable @@ -27,11 +35,19 @@ library executable ringo hs-source-dirs: app + other-modules: Ringo.ArgParser, + Ringo.InputParser main-is: Main.hs - build-depends: base - , ringo - , pretty-show - ghc-options: -Wall -fno-warn-unused-do-bind -funbox-strict-fields -fno-warn-orphans + build-depends: base >=4.7 && <5, + text >=1.2 && <1.3, + optparse-applicative >=0.11 && <0.12, + yaml >=0.8 && <0.9, + vector >=0.10 && <0.11, + ringo + ghc-options: -Wall -fno-warn-unused-do-bind -funbox-strict-fields -fno-warn-orphans -O2 + default-extensions: OverloadedStrings, RecordWildCards, ScopedTypeVariables, + BangPatterns, TupleSections, NamedFieldPuns, GeneralizedNewtypeDeriving, + DeriveDataTypeable default-language: Haskell2010 test-suite ringo-test diff --git a/src/Ringo.hs b/src/Ringo.hs index 357f4cc..cdd53c0 100644 --- a/src/Ringo.hs +++ b/src/Ringo.hs @@ -1,11 +1,36 @@ module Ringo ( module Ringo.Types - , module Ringo.Extractor - , module Ringo.Generator + , extractFactTable + , extractDimensionTables + , G.tableDefnSQL + , dimensionTableInsertSQL + , factTableInsertSQL + , validateTable + , validateFact ) where -import Ringo.Types -import Ringo.Extractor -import Ringo.Generator --- import qualified Ringo.Tables as Tables +import Control.Monad.Reader (runReader) +import Data.Text (Text) +import Ringo.Types +import qualified Ringo.Extractor as E +import qualified Ringo.Generator as G +import qualified Ringo.Validator as V + +extractFactTable :: Env -> Fact -> Table +extractFactTable env = flip runReader env . E.extractFactTable + +extractDimensionTables :: Env -> Fact -> [Table] +extractDimensionTables env = flip runReader env . E.extractDimensionTables + +dimensionTableInsertSQL :: Env -> Fact -> TableName -> Text +dimensionTableInsertSQL env fact = flip runReader env . G.dimensionTableInsertSQL fact + +factTableInsertSQL :: Env -> Fact -> Text +factTableInsertSQL env = flip runReader env . G.factTableInsertSQL + +validateTable :: Env -> Table -> [ValidationError] +validateTable env = flip runReader env . V.validateTable + +validateFact :: Env -> Fact -> [ValidationError] +validateFact env = flip runReader env . V.validateFact diff --git a/src/Ringo/Types.hs b/src/Ringo/Types.hs index fb5ef12..fb9c97a 100644 --- a/src/Ringo/Types.hs +++ b/src/Ringo/Types.hs @@ -1,7 +1,7 @@ module Ringo.Types where import Data.Text (Text) -import qualified Data.Text as T +import qualified Data.Text as Text type ColumnName = Text type ColumnType = Text @@ -27,10 +27,10 @@ data Table = Table } deriving (Eq, Show) data TimeUnit = Second | Minute | Hour | Day | Week - deriving (Eq, Enum, Show) + deriving (Eq, Enum, Show, Read) timeUnitName :: TimeUnit -> Text -timeUnitName = T.toLower . T.pack . show +timeUnitName = Text.toLower . Text.pack . show timeUnitToSeconds :: TimeUnit -> Int timeUnitToSeconds Second = 1 @@ -46,8 +46,6 @@ data Fact = Fact , factColumns :: ![FactColumn] } deriving (Eq, Show) -data FactValType = Count | Sum | Average | CountDistinct deriving (Eq, Enum, Show) - data FactColumn = DimTime !ColumnName | NoDimId !ColumnName | DimId !TableName !ColumnName diff --git a/src/Ringo/Validator.hs b/src/Ringo/Validator.hs index 0a650fb..bc6bf2e 100644 --- a/src/Ringo/Validator.hs +++ b/src/Ringo/Validator.hs @@ -37,7 +37,7 @@ validateFact Fact {..} = do Just table -> do tableVs <- validateTable table parentVs <- concat <$> mapM checkFactParents factParentNames - let colVs = concatMap (checkColumn table) factColumns + let colVs = concatMap (checkColumn tables table) factColumns return $ tableVs ++ parentVs ++ colVs where checkFactParents fName = do @@ -46,7 +46,17 @@ validateFact Fact {..} = do Nothing -> return [ MissingFact fName ] Just pFact -> validateFact pFact - checkColumn table = maybe [] (checkTableForCol table) . factColumnName + checkColumn tables table factCol = + maybe [] (checkTableForCol table) (factColumnName factCol) + ++ checkColumnTable tables factCol + + checkColumnTable tables factCol = case factCol of + DimId tName _ -> go tName + _ -> [] + where + go tName = case findTable tName tables of + Nothing -> [ MissingTable tName ] + Just _ -> [] withFactValidation :: Fact -> Reader Env a -> Reader Env (Either [ValidationError] a) withFactValidation fact func = do