Adds incremental refresh sql generation for dimension tables.
- Adds partial unique indexes for tables to handle null columns. - Adds validation for time column in fact tables.pull/1/head
parent
e46d3684cd
commit
7dc6db944f
29
app/Main.hs
29
app/Main.hs
|
@ -14,7 +14,7 @@ import Ringo
|
||||||
import Ringo.ArgParser
|
import Ringo.ArgParser
|
||||||
import Ringo.InputParser
|
import Ringo.InputParser
|
||||||
|
|
||||||
data SQLType = Create | Populate | Update deriving (Eq, Show)
|
data SQLType = Create | FullRefresh | IncRefresh deriving (Eq, Show)
|
||||||
|
|
||||||
main :: IO ()
|
main :: IO ()
|
||||||
main = do
|
main = do
|
||||||
|
@ -36,30 +36,29 @@ writeSQLFiles outputDir env@Env{..} = forM_ sqls $ \(sqlType, table, sql) -> do
|
||||||
createDirectoryIfMissing True dirName
|
createDirectoryIfMissing True dirName
|
||||||
writeFile fileName sql
|
writeFile fileName sql
|
||||||
where
|
where
|
||||||
dimTables = map (\fact -> (fact, extractDimensionTables env fact)) envFacts
|
dimTables = [ (fact, extractDimensionTables env fact) | fact <- envFacts ]
|
||||||
factTables = map (\fact -> (fact, extractFactTable env fact)) envFacts
|
factTables = [ (fact, extractFactTable env fact) | fact <- envFacts ]
|
||||||
|
|
||||||
dimTableDefnSQLs = [ (Create, tableName table, unlines . map sqlStr . tableDefnSQL $ table)
|
dimTableDefnSQLs = [ (Create, tableName table, unlines . map sqlStr . tableDefnSQL $ table)
|
||||||
| (_, tabs) <- dimTables
|
| (_, tabs) <- dimTables
|
||||||
, table <- tabs
|
, table <- tabs
|
||||||
, table `notElem` envTables ]
|
, table `notElem` envTables ]
|
||||||
factTableDefnSQLs = [ (Create
|
|
||||||
, tableName table, unlines . map sqlStr $ factTableDefnSQL env fact table)
|
factTableDefnSQLs = [ (Create , tableName table, unlines . map sqlStr $ factTableDefnSQL env fact table)
|
||||||
| (fact, table) <- factTables ]
|
| (fact, table) <- factTables ]
|
||||||
|
|
||||||
dimTableInsertSQLs = [ (Populate
|
dimTablePopulateSQLs typ gen = [ (typ , tableName table, sqlStr $ gen env fact (tableName table))
|
||||||
, tableName table
|
| (fact, tabs) <- dimTables
|
||||||
, sqlStr $ dimensionTableInsertSQL env fact (tableName table))
|
, table <- tabs
|
||||||
| (fact, tabs) <- dimTables
|
, table `notElem` envTables ]
|
||||||
, table <- tabs
|
|
||||||
, table `notElem` envTables ]
|
|
||||||
|
|
||||||
factTableInsertSQLs = [ (Populate, tableName table, sqlStr $ factTableInsertSQL env fact)
|
factTableInsertSQLs = [ (FullRefresh, tableName table, sqlStr $ factTableInsertSQL env fact)
|
||||||
| (fact, table) <- factTables ]
|
| (fact, table) <- factTables ]
|
||||||
|
|
||||||
sqls = concat [ dimTableDefnSQLs
|
sqls = concat [ dimTableDefnSQLs
|
||||||
, factTableDefnSQLs
|
, factTableDefnSQLs
|
||||||
, dimTableInsertSQLs
|
, dimTablePopulateSQLs FullRefresh $ dimensionTablePopulateSQL FullPopulation
|
||||||
|
, dimTablePopulateSQLs IncRefresh $ dimensionTablePopulateSQL IncrementalPopulation
|
||||||
, factTableInsertSQLs
|
, factTableInsertSQLs
|
||||||
]
|
]
|
||||||
|
|
||||||
|
|
|
@ -4,7 +4,7 @@ module Ringo
|
||||||
, extractDimensionTables
|
, extractDimensionTables
|
||||||
, G.tableDefnSQL
|
, G.tableDefnSQL
|
||||||
, factTableDefnSQL
|
, factTableDefnSQL
|
||||||
, dimensionTableInsertSQL
|
, dimensionTablePopulateSQL
|
||||||
, factTableInsertSQL
|
, factTableInsertSQL
|
||||||
, validateTable
|
, validateTable
|
||||||
, validateFact
|
, validateFact
|
||||||
|
@ -27,8 +27,9 @@ extractDimensionTables env = flip runReader env . E.extractDimensionTables
|
||||||
factTableDefnSQL :: Env -> Fact -> Table -> [Text]
|
factTableDefnSQL :: Env -> Fact -> Table -> [Text]
|
||||||
factTableDefnSQL env fact = flip runReader env . G.factTableDefnSQL fact
|
factTableDefnSQL env fact = flip runReader env . G.factTableDefnSQL fact
|
||||||
|
|
||||||
dimensionTableInsertSQL :: Env -> Fact -> TableName -> Text
|
dimensionTablePopulateSQL :: TablePopulationMode -> Env -> Fact -> TableName -> Text
|
||||||
dimensionTableInsertSQL env fact = flip runReader env . G.dimensionTableInsertSQL fact
|
dimensionTablePopulateSQL popMode env fact =
|
||||||
|
flip runReader env . G.dimensionTablePopulateSQL popMode fact
|
||||||
|
|
||||||
factTableInsertSQL :: Env -> Fact -> Text
|
factTableInsertSQL :: Env -> Fact -> Text
|
||||||
factTableInsertSQL env = flip runReader env . G.factTableInsertSQL
|
factTableInsertSQL env = flip runReader env . G.factTableInsertSQL
|
||||||
|
|
|
@ -10,7 +10,7 @@ import Control.Applicative ((<$>))
|
||||||
|
|
||||||
import Control.Monad.Reader (Reader, asks)
|
import Control.Monad.Reader (Reader, asks)
|
||||||
import Data.Function (on)
|
import Data.Function (on)
|
||||||
import Data.Maybe (mapMaybe, fromMaybe, fromJust)
|
import Data.Maybe (mapMaybe, fromMaybe, fromJust, catMaybes)
|
||||||
import Data.Monoid ((<>))
|
import Data.Monoid ((<>))
|
||||||
import Data.List (nub, nubBy)
|
import Data.List (nub, nubBy)
|
||||||
import Data.Text (Text)
|
import Data.Text (Text)
|
||||||
|
@ -48,10 +48,7 @@ extractDimensionTables fact = do
|
||||||
let table = fromJust . findTable (factTableName fact) $ tables
|
let table = fromJust . findTable (factTableName fact) $ tables
|
||||||
return $ dimsFromIds tables ++ dimsFromVals settings (tableColumns table)
|
return $ dimsFromIds tables ++ dimsFromVals settings (tableColumns table)
|
||||||
where
|
where
|
||||||
dimsFromIds tables =
|
dimsFromIds tables = catMaybes [ findTable d tables | DimId d _ <- factColumns fact ]
|
||||||
forMaybe (factColumns fact) $ \fcol -> case fcol of
|
|
||||||
DimId d _ -> findTable d tables
|
|
||||||
_ -> Nothing
|
|
||||||
|
|
||||||
dimsFromVals Settings {..} tableColumns =
|
dimsFromVals Settings {..} tableColumns =
|
||||||
map (\(dim, cols) ->
|
map (\(dim, cols) ->
|
||||||
|
|
|
@ -1,7 +1,7 @@
|
||||||
module Ringo.Generator
|
module Ringo.Generator
|
||||||
( tableDefnSQL
|
( tableDefnSQL
|
||||||
, factTableDefnSQL
|
, factTableDefnSQL
|
||||||
, dimensionTableInsertSQL
|
, dimensionTablePopulateSQL
|
||||||
, factTableInsertSQL
|
, factTableInsertSQL
|
||||||
) where
|
) where
|
||||||
|
|
||||||
|
@ -13,9 +13,10 @@ import Control.Applicative ((<$>))
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
import Control.Monad.Reader (Reader, asks)
|
import Control.Monad.Reader (Reader, asks)
|
||||||
import Data.List (intersperse, nub, find)
|
import Data.List (nub, find, subsequences, partition, sortBy)
|
||||||
import Data.Maybe (fromJust, mapMaybe)
|
import Data.Maybe (fromJust, mapMaybe, catMaybes)
|
||||||
import Data.Monoid ((<>))
|
import Data.Monoid ((<>))
|
||||||
|
import Data.Ord (comparing)
|
||||||
import Data.Text (Text)
|
import Data.Text (Text)
|
||||||
|
|
||||||
import Ringo.Extractor.Internal
|
import Ringo.Extractor.Internal
|
||||||
|
@ -31,24 +32,44 @@ columnDefnSQL Column {..} =
|
||||||
columnName <> " " <> columnType <> " " <> nullableDefnSQL columnNullable
|
columnName <> " " <> columnType <> " " <> nullableDefnSQL columnNullable
|
||||||
|
|
||||||
colNamesString :: [ColumnName] -> Text
|
colNamesString :: [ColumnName] -> Text
|
||||||
colNamesString cNames = Text.concat (intersperse ", " cNames)
|
colNamesString = Text.intercalate ", "
|
||||||
|
|
||||||
constraintDefnSQL :: TableName -> TableConstraint -> Text
|
fullColName :: TableName -> ColumnName -> ColumnName
|
||||||
constraintDefnSQL tableName constraint =
|
fullColName tName cName = tName <> "." <> cName
|
||||||
"ALTER TABLE ONLY " <> tableName <> " ADD "
|
|
||||||
<> case constraint of
|
constraintDefnSQL :: Table -> TableConstraint -> [Text]
|
||||||
PrimaryKey cName -> "PRIMARY KEY (" <> cName <> ")"
|
constraintDefnSQL Table {..} constraint =
|
||||||
UniqueKey cNames -> "UNIQUE (" <> colNamesString cNames <> ")"
|
let alterTableSQL = "ALTER TABLE ONLY " <> tableName <> " ADD "
|
||||||
ForeignKey oTableName cNamePairs ->
|
in case constraint of
|
||||||
"FOREIGN KEY (" <> colNamesString (map fst cNamePairs) <> ") REFERENCES "
|
PrimaryKey cName -> [ alterTableSQL <> "PRIMARY KEY (" <> cName <> ")" ]
|
||||||
<> oTableName <> " (" <> colNamesString (map snd cNamePairs) <> ")"
|
ForeignKey oTableName cNamePairs ->
|
||||||
|
[ alterTableSQL <> "FOREIGN KEY (" <> colNamesString (map fst cNamePairs) <> ") REFERENCES "
|
||||||
|
<> oTableName <> " (" <> colNamesString (map snd cNamePairs) <> ")" ]
|
||||||
|
UniqueKey cNames -> let
|
||||||
|
(notNullCols, nullCols) =
|
||||||
|
both (map columnName)
|
||||||
|
$ partition ((== NotNull) . columnNullable)
|
||||||
|
$ catMaybes [ findColumn cName tableColumns | cName <- cNames ]
|
||||||
|
combinations =
|
||||||
|
map (\cs -> (cs, [ c | c <- nullCols, c `notElem` cs ]))
|
||||||
|
. sortBy (comparing length)
|
||||||
|
$ subsequences nullCols
|
||||||
|
in [ "CREATE UNIQUE INDEX ON " <> tableName
|
||||||
|
<> " (" <> colNamesString (notNullCols ++ nnCols) <> ")"
|
||||||
|
<> if null whereClauses
|
||||||
|
then ""
|
||||||
|
else "\nWHERE "<> Text.intercalate "\nAND " whereClauses
|
||||||
|
| (nnCols, nCols) <- combinations
|
||||||
|
, not $ null (notNullCols ++ nnCols)
|
||||||
|
, let whereClauses =
|
||||||
|
[ c <> " IS NOT NULL" | c <- nnCols ] ++ [ c <> " IS NULL" | c <- nCols ] ]
|
||||||
|
|
||||||
tableDefnSQL :: Table -> [Text]
|
tableDefnSQL :: Table -> [Text]
|
||||||
tableDefnSQL Table {..} =
|
tableDefnSQL table@Table {..} =
|
||||||
tableSQL : map (constraintDefnSQL tableName) tableConstraints
|
tableSQL : concatMap (constraintDefnSQL table) tableConstraints
|
||||||
where
|
where
|
||||||
tableSQL = "CREATE TABLE " <> tableName <> " (\n"
|
tableSQL = "CREATE TABLE " <> tableName <> " (\n"
|
||||||
<> (Text.concat . intersperse ",\n" . map columnDefnSQL $ tableColumns)
|
<> (Text.intercalate ",\n" . map columnDefnSQL $ tableColumns)
|
||||||
<> "\n)"
|
<> "\n)"
|
||||||
|
|
||||||
factTableDefnSQL :: Fact -> Table -> Reader Env [Text]
|
factTableDefnSQL :: Fact -> Table -> Reader Env [Text]
|
||||||
|
@ -71,21 +92,33 @@ factTableDefnSQL fact table = do
|
||||||
|
|
||||||
dimColumnMapping :: Text -> Fact -> TableName -> [(ColumnName, ColumnName)]
|
dimColumnMapping :: Text -> Fact -> TableName -> [(ColumnName, ColumnName)]
|
||||||
dimColumnMapping dimPrefix fact dimTableName =
|
dimColumnMapping dimPrefix fact dimTableName =
|
||||||
forMaybe (factColumns fact) $ \fCol -> case fCol of
|
[ (dimColumnName dName cName, cName)
|
||||||
DimVal dName cName | dimPrefix <> dName == dimTableName ->
|
| DimVal dName cName <- factColumns fact , dimPrefix <> dName == dimTableName]
|
||||||
Just (dimColumnName dName cName, cName)
|
|
||||||
_ -> Nothing
|
|
||||||
|
|
||||||
dimensionTableInsertSQL :: Fact -> TableName -> Reader Env Text
|
dimensionTablePopulateSQL :: TablePopulationMode -> Fact -> TableName -> Reader Env Text
|
||||||
dimensionTableInsertSQL fact dimTableName = do
|
dimensionTablePopulateSQL popMode fact dimTableName = do
|
||||||
dimPrefix <- settingDimPrefix <$> asks envSettings
|
dimPrefix <- settingDimPrefix <$> asks envSettings
|
||||||
let colMapping = dimColumnMapping dimPrefix fact dimTableName
|
let colMapping = dimColumnMapping dimPrefix fact dimTableName
|
||||||
|
baseSelectC = "SELECT DISTINCT\n" <> colNamesString (map snd colMapping) <> "\n"
|
||||||
return $ "INSERT INTO " <> dimTableName <> " (\n"
|
<> "FROM " <> factTableName fact
|
||||||
<> colNamesString (map fst colMapping)
|
insertC selectC = "INSERT INTO " <> dimTableName
|
||||||
<> "\n) SELECT DISTINCT \n"
|
<> " (\n" <> colNamesString (map fst colMapping) <> "\n) "
|
||||||
<> colNamesString (map snd colMapping)
|
<> "SELECT x.* FROM (\n" <> selectC <> ") x"
|
||||||
<> "\nFROM " <> factTableName fact
|
timeCol = head [ cName | DimTime cName <- factColumns fact ]
|
||||||
|
return $ case popMode of
|
||||||
|
FullPopulation -> insertC baseSelectC
|
||||||
|
IncrementalPopulation ->
|
||||||
|
insertC (baseSelectC <> "\nWHERE "
|
||||||
|
<> timeCol <> " > ? AND " <> timeCol <> " <= ?"
|
||||||
|
<> " AND (\n"
|
||||||
|
<> Text.intercalate "\nOR " [ c <> " IS NOT NULL" | (_, c) <- colMapping ]
|
||||||
|
<> "\n)")
|
||||||
|
<> "\nLEFT JOIN " <> dimTableName <> " ON\n"
|
||||||
|
<> Text.intercalate " \nAND "
|
||||||
|
[ fullColName dimTableName c1 <> " IS NOT DISTINCT FROM " <> fullColName "x" c2
|
||||||
|
| (c1, c2) <- colMapping ]
|
||||||
|
<> "\nWHERE " <> Text.intercalate " \nAND "
|
||||||
|
[ fullColName dimTableName c <> " IS NULL" | (c, _) <- colMapping ]
|
||||||
|
|
||||||
factTableInsertSQL :: Fact -> Reader Env Text
|
factTableInsertSQL :: Fact -> Reader Env Text
|
||||||
factTableInsertSQL fact = do
|
factTableInsertSQL fact = do
|
||||||
|
@ -131,7 +164,7 @@ factTableInsertSQL fact = do
|
||||||
[ fullColName tableName c1 <> " = " <> fullColName factSourceTableName c2
|
[ fullColName tableName c1 <> " = " <> fullColName factSourceTableName c2
|
||||||
| (c1, c2) <- dimColumnMapping settingDimPrefix dimFact tableName ]
|
| (c1, c2) <- dimColumnMapping settingDimPrefix dimFact tableName ]
|
||||||
in "SELECT " <> dimIdColName <> " FROM " <> tableName <> "\nWHERE "
|
in "SELECT " <> dimIdColName <> " FROM " <> tableName <> "\nWHERE "
|
||||||
<> (Text.concat . intersperse "\n AND " $ dimLookupWhereClauses)
|
<> (Text.intercalate "\n AND " $ dimLookupWhereClauses)
|
||||||
in (colName, insertSQL, True)
|
in (colName, insertSQL, True)
|
||||||
|
|
||||||
colMap = [ (cName, if addAs then asName cName sql else sql, addAs)
|
colMap = [ (cName, if addAs then asName cName sql else sql, addAs)
|
||||||
|
@ -147,18 +180,17 @@ factTableInsertSQL fact = do
|
||||||
<> extractedFactTableName settingFactPrefix settingFactInfix (factName fact) settingTimeUnit
|
<> extractedFactTableName settingFactPrefix settingFactInfix (factName fact) settingTimeUnit
|
||||||
<> " (\n" <> unlineCols (map fst3 colMap) <> "\n)"
|
<> " (\n" <> unlineCols (map fst3 colMap) <> "\n)"
|
||||||
<> "\nSELECT \n" <> unlineCols (map snd3 colMap)
|
<> "\nSELECT \n" <> unlineCols (map snd3 colMap)
|
||||||
<> "\nFROM " <> fTableName <> "\n" <> Text.concat (intersperse "\n" joinClauses)
|
<> "\nFROM " <> fTableName <> "\n" <> Text.intercalate"\n" joinClauses
|
||||||
<> "\nGROUP BY \n"
|
<> "\nGROUP BY \n"
|
||||||
<> unlineCols (map ((groupByColPrefix <>) . fst3) . filter thd3 $ colMap)
|
<> unlineCols (map ((groupByColPrefix <>) . fst3) . filter thd3 $ colMap)
|
||||||
where
|
where
|
||||||
groupByColPrefix = "xxff_"
|
groupByColPrefix = "xxff_"
|
||||||
fullColName tName cName = tName <> "." <> cName
|
|
||||||
asName cName sql = "(" <> sql <> ")" <> " as " <> groupByColPrefix <> cName
|
asName cName sql = "(" <> sql <> ")" <> " as " <> groupByColPrefix <> cName
|
||||||
unlineCols = Text.concat . intersperse ",\n "
|
unlineCols = Text.intercalate ",\n "
|
||||||
|
|
||||||
joinClausePreds table oTableName =
|
joinClausePreds table oTableName =
|
||||||
fmap (\(ForeignKey _ colPairs) ->
|
fmap (\(ForeignKey _ colPairs) ->
|
||||||
Text.concat . intersperse " AND "
|
Text.intercalate " AND "
|
||||||
. map (\(c1, c2) -> fullColName (tableName table) c1 <> " = " <> fullColName oTableName c2)
|
. map (\(c1, c2) -> fullColName (tableName table) c1 <> " = " <> fullColName oTableName c2)
|
||||||
$ colPairs )
|
$ colPairs )
|
||||||
. find (\cons -> case cons of
|
. find (\cons -> case cons of
|
||||||
|
|
|
@ -93,9 +93,10 @@ defSettings = Settings
|
||||||
, settingFactInfix = "_by_"
|
, settingFactInfix = "_by_"
|
||||||
}
|
}
|
||||||
|
|
||||||
data ValidationError = MissingTable !TableName
|
data ValidationError = MissingTable !TableName
|
||||||
| MissingFact !TableName
|
| MissingFact !TableName
|
||||||
| MissingColumn !TableName !ColumnName
|
| MissingColumn !TableName !ColumnName
|
||||||
|
| MissingTimeColumn !TableName
|
||||||
deriving (Eq, Show)
|
deriving (Eq, Show)
|
||||||
|
|
||||||
data Env = Env
|
data Env = Env
|
||||||
|
@ -103,3 +104,5 @@ data Env = Env
|
||||||
, envFacts :: ![Fact]
|
, envFacts :: ![Fact]
|
||||||
, envSettings :: !Settings
|
, envSettings :: !Settings
|
||||||
} deriving (Eq, Show)
|
} deriving (Eq, Show)
|
||||||
|
|
||||||
|
data TablePopulationMode = FullPopulation | IncrementalPopulation deriving (Eq, Show)
|
||||||
|
|
|
@ -1,7 +1,6 @@
|
||||||
module Ringo.Validator
|
module Ringo.Validator
|
||||||
( validateTable
|
( validateTable
|
||||||
, validateFact
|
, validateFact
|
||||||
, withFactValidation
|
|
||||||
) where
|
) where
|
||||||
|
|
||||||
#if MIN_VERSION_base(4,8,0)
|
#if MIN_VERSION_base(4,8,0)
|
||||||
|
@ -40,10 +39,12 @@ validateFact Fact {..} = do
|
||||||
case findTable factTableName tables of
|
case findTable factTableName tables of
|
||||||
Nothing -> return [ MissingTable factTableName ]
|
Nothing -> return [ MissingTable factTableName ]
|
||||||
Just table -> do
|
Just table -> do
|
||||||
tableVs <- validateTable table
|
tableVs <- validateTable table
|
||||||
parentVs <- concat <$> mapM checkFactParents factParentNames
|
parentVs <- concat <$> mapM checkFactParents factParentNames
|
||||||
let colVs = concatMap (checkColumn tables table) factColumns
|
let colVs = concatMap (checkColumn tables table) factColumns
|
||||||
return $ tableVs ++ parentVs ++ colVs
|
let timeVs = [ MissingTimeColumn factTableName
|
||||||
|
| null [ c | DimTime c <- factColumns ] ]
|
||||||
|
return $ tableVs ++ parentVs ++ colVs ++ timeVs
|
||||||
where
|
where
|
||||||
checkFactParents fName = do
|
checkFactParents fName = do
|
||||||
facts <- asks envFacts
|
facts <- asks envFacts
|
||||||
|
@ -58,10 +59,3 @@ validateFact Fact {..} = do
|
||||||
checkColumnTable tables factCol = case factCol of
|
checkColumnTable tables factCol = case factCol of
|
||||||
DimId tName _ -> maybe [ MissingTable tName ] (const []) $ findTable tName tables
|
DimId tName _ -> maybe [ MissingTable tName ] (const []) $ findTable tName tables
|
||||||
_ -> []
|
_ -> []
|
||||||
|
|
||||||
withFactValidation :: Fact -> Reader Env a -> Reader Env (Either [ValidationError] a)
|
|
||||||
withFactValidation fact func = do
|
|
||||||
errors <- validateFact fact
|
|
||||||
if not $ null errors
|
|
||||||
then return $ Left errors
|
|
||||||
else fmap Right func
|
|
||||||
|
|
Loading…
Reference in New Issue