Adds output json generation.

- JSON files are generated for the following:
 - list of fact tables generated
 - list of dimension tables generated
 - dependencies between the fact, dimension and source tables
pull/1/head
Abhinav Sarkar 2015-12-21 15:30:23 +05:30
parent 7d64ffcde4
commit d1e1eb7676
8 changed files with 86 additions and 12 deletions

View File

@ -1,7 +1,10 @@
module Main where
import qualified Data.Text as Text
import qualified Data.ByteString.Lazy as BS
import qualified Data.Map as Map
import qualified Data.Text as Text
import Data.Aeson (encode)
import Data.Char (toLower)
import Data.List (nub)
import Data.Monoid ((<>))
@ -26,15 +29,28 @@ main = do
let env = Env tables facts progSettings
let errors = nub $ concatMap (validateTable env) tables ++ concatMap (validateFact env) facts
if not $ null errors
then mapM_ print errors >> exitFailure
else writeSQLFiles progOutputDir env >> exitSuccess
then mapM_ print errors >> exitFailure
else writeFiles progOutputDir env >> exitSuccess
writeFiles :: FilePath -> Env -> IO ()
writeFiles outputDir env@Env{..} = do
let Settings{..} = envSettings
forM_ sqls $ \(sqlType, table, sql) -> do
let dirName = outputDir </> map toLower (show sqlType)
createDirectoryIfMissing True dirName
writeFile (dirName </> Text.unpack table <.> "sql") sql
BS.writeFile (outputDir </> Text.unpack settingDependenciesJSONFileName)
. encode
. foldl (\acc -> Map.union acc . extractDependencies env) Map.empty
$ envFacts
BS.writeFile (outputDir </> Text.unpack settingFactsJSONFileName) . encode $
[ tableName table | (_, tabs) <- dimTables, table <- tabs , table `notElem` envTables ]
BS.writeFile (outputDir </> Text.unpack settingDimensionJSONFileName) . encode $
[ tableName table | (_, table) <- factTables ]
writeSQLFiles :: FilePath -> Env -> IO ()
writeSQLFiles outputDir env@Env{..} = forM_ sqls $ \(sqlType, table, sql) -> do
let dirName = outputDir </> map toLower (show sqlType)
fileName = dirName </> Text.unpack table <.> "sql"
createDirectoryIfMissing True dirName
writeFile fileName sql
where
dimTables = [ (fact, extractDimensionTables env fact) | fact <- envFacts ]
factTables = [ (fact, extractFactTable env fact) | fact <- envFacts ]

View File

@ -55,6 +55,15 @@ settingsParser = let Settings {..} = defSettings
<*> minorOption "fact-infix"
settingFactInfix
"Infix for fact tables"
<*> minorOption "dependencies-json-file"
settingDependenciesJSONFileName
"Name of the output dependencies json file"
<*> minorOption "facts-json-file"
settingFactsJSONFileName
"Name of the output facts json file"
<*> minorOption "dimensions-json-file"
settingDimensionJSONFileName
"Name of the output dimensions json file"
where
minorOption longDesc defValue helpTxt =
Text.pack <$> strOption (long longDesc

View File

@ -38,11 +38,14 @@ executable ringo
main-is: Main.hs
build-depends: base >=4.7 && <5,
text >=1.2 && <1.3,
bytestring >=0.10 && <0.11,
containers >=0.5 && <0.6,
optparse-applicative >=0.11 && <0.12,
yaml >=0.8 && <0.9,
vector >=0.10 && <0.11,
directory >=1.2 && <1.3,
filepath >=1.3 && <1.5,
aeson >=0.8 && <0.9,
ringo
ghc-options: -Wall -fno-warn-unused-do-bind -funbox-strict-fields -fno-warn-orphans -O2
default-extensions: OverloadedStrings, RecordWildCards, ScopedTypeVariables, BangPatterns,

View File

@ -2,6 +2,7 @@ module Ringo
( module Ringo.Types
, extractFactTable
, extractDimensionTables
, extractDependencies
, G.tableDefnSQL
, factTableDefnSQL
, dimensionTablePopulateSQL
@ -24,6 +25,9 @@ extractFactTable env = flip runReader env . E.extractFactTable
extractDimensionTables :: Env -> Fact -> [Table]
extractDimensionTables env = flip runReader env . E.extractDimensionTables
extractDependencies :: Env -> Fact -> Dependencies
extractDependencies env = flip runReader env . E.extractDependencies
factTableDefnSQL :: Env -> Fact -> Table -> [Text]
factTableDefnSQL env fact = flip runReader env . G.factTableDefnSQL fact

View File

@ -2,11 +2,16 @@ module Ringo.Extractor
( extractDimensionTables
, extractAllDimensionTables
, extractFactTable
, extractDependencies
) where
import qualified Data.Map as Map
import qualified Data.Tree as Tree
import Control.Monad.Reader (Reader, asks)
import Data.Maybe (fromJust)
import Data.Monoid ((<>))
import Data.List (nub)
import Ringo.Extractor.Internal
import Ringo.Types
@ -59,3 +64,30 @@ extractFactTable fact = do
, tableColumns = columns ++ map fst fks
, tableConstraints = UniqueKey ukColNames : map snd fks
}
extractDependencies :: Fact -> Reader Env Dependencies
extractDependencies fact = do
settings@Settings{..} <- asks envSettings
facts <- asks envFacts
let factSourceDeps =
nub . Tree.flatten . flip Tree.unfoldTree fact $ \fct ->
(factTableName fct, parentFacts fct facts)
factDimDeps =
nub . concat . Tree.flatten . flip Tree.unfoldTree fact $ \fct ->
( forMaybe (factColumns fct) $ \col -> case col of
DimVal table _ -> Just $ settingDimPrefix <> table
DimId table _ -> Just table
_ -> Nothing
, parentFacts fct facts
)
dimDeps = Map.fromList [ (settingDimPrefix <> table, [factTableName fact])
| DimVal table _ <- factColumns fact ]
factDeps = Map.singleton (extractedTable settings) (factSourceDeps ++ factDimDeps)
return $ Map.union dimDeps factDeps
where
extractedTable Settings {..} =
extractedFactTableName settingFactPrefix settingFactInfix (factName fact) settingTimeUnit
parentFacts fct facts = [ fromJust $ findFact pf facts | pf <- factParentNames fct ]

View File

@ -164,7 +164,7 @@ factTablePopulateSQL popMode fact = do
[ fullColName tableName c1 <> " = " <> fullColName factSourceTableName c2
| (c1, c2) <- dimColumnMapping settingDimPrefix dimFact tableName ]
in "SELECT " <> dimIdColName <> " FROM " <> tableName <> "\nWHERE "
<> (Text.intercalate "\n AND " $ dimLookupWhereClauses)
<> Text.intercalate "\n AND " dimLookupWhereClauses
in (colName, insertSQL, True)
colMap = [ (cName, if addAs then asName cName sql else sql, addAs)

View File

@ -1,8 +1,10 @@
module Ringo.Types where
import Data.Text (Text)
import qualified Data.Text as Text
import Data.Map (Map)
import Data.Text (Text)
type ColumnName = Text
type ColumnType = Text
type TableName = Text
@ -77,6 +79,9 @@ data Settings = Settings
, settingDimTableIdColumnType :: !Text
, settingFactCountColumnType :: !Text
, settingFactInfix :: !Text
, settingDependenciesJSONFileName :: !Text
, settingFactsJSONFileName :: !Text
, settingDimensionJSONFileName :: !Text
} deriving (Eq, Show)
defSettings :: Settings
@ -91,6 +96,9 @@ defSettings = Settings
, settingDimTableIdColumnType = "serial"
, settingFactCountColumnType = "integer"
, settingFactInfix = "_by_"
, settingDependenciesJSONFileName = "dependencies.json"
, settingFactsJSONFileName = "facts.json"
, settingDimensionJSONFileName = "dimensions.json"
}
data ValidationError = MissingTable !TableName
@ -106,3 +114,5 @@ data Env = Env
} deriving (Eq, Show)
data TablePopulationMode = FullPopulation | IncrementalPopulation deriving (Eq, Show)
type Dependencies = Map TableName [TableName]

View File

@ -1,7 +1,7 @@
# For more information, see: https://github.com/commercialhaskell/stack/blob/master/doc/yaml_configuration.md
# Specifies the GHC version and set of packages available (e.g., lts-3.5, nightly-2015-09-21, ghc-7.10.2)
resolver: lts-3.6
resolver: lts-3.19
# Local packages, usually specified by relative directory name
packages: