Adds incremental refresh sql generation for dimension tables.
- Adds partial unique indexes for tables to handle null columns. - Adds validation for time column in fact tables.
This commit is contained in:
parent
e46d3684cd
commit
7dc6db944f
29
app/Main.hs
29
app/Main.hs
@ -14,7 +14,7 @@ import Ringo
|
||||
import Ringo.ArgParser
|
||||
import Ringo.InputParser
|
||||
|
||||
data SQLType = Create | Populate | Update deriving (Eq, Show)
|
||||
data SQLType = Create | FullRefresh | IncRefresh deriving (Eq, Show)
|
||||
|
||||
main :: IO ()
|
||||
main = do
|
||||
@ -36,30 +36,29 @@ writeSQLFiles outputDir env@Env{..} = forM_ sqls $ \(sqlType, table, sql) -> do
|
||||
createDirectoryIfMissing True dirName
|
||||
writeFile fileName sql
|
||||
where
|
||||
dimTables = map (\fact -> (fact, extractDimensionTables env fact)) envFacts
|
||||
factTables = map (\fact -> (fact, extractFactTable env fact)) envFacts
|
||||
dimTables = [ (fact, extractDimensionTables env fact) | fact <- envFacts ]
|
||||
factTables = [ (fact, extractFactTable env fact) | fact <- envFacts ]
|
||||
|
||||
dimTableDefnSQLs = [ (Create, tableName table, unlines . map sqlStr . tableDefnSQL $ table)
|
||||
| (_, tabs) <- dimTables
|
||||
, table <- tabs
|
||||
| (_, tabs) <- dimTables
|
||||
, table <- tabs
|
||||
, table `notElem` envTables ]
|
||||
factTableDefnSQLs = [ (Create
|
||||
, tableName table, unlines . map sqlStr $ factTableDefnSQL env fact table)
|
||||
|
||||
factTableDefnSQLs = [ (Create , tableName table, unlines . map sqlStr $ factTableDefnSQL env fact table)
|
||||
| (fact, table) <- factTables ]
|
||||
|
||||
dimTableInsertSQLs = [ (Populate
|
||||
, tableName table
|
||||
, sqlStr $ dimensionTableInsertSQL env fact (tableName table))
|
||||
| (fact, tabs) <- dimTables
|
||||
, table <- tabs
|
||||
, table `notElem` envTables ]
|
||||
dimTablePopulateSQLs typ gen = [ (typ , tableName table, sqlStr $ gen env fact (tableName table))
|
||||
| (fact, tabs) <- dimTables
|
||||
, table <- tabs
|
||||
, table `notElem` envTables ]
|
||||
|
||||
factTableInsertSQLs = [ (Populate, tableName table, sqlStr $ factTableInsertSQL env fact)
|
||||
factTableInsertSQLs = [ (FullRefresh, tableName table, sqlStr $ factTableInsertSQL env fact)
|
||||
| (fact, table) <- factTables ]
|
||||
|
||||
sqls = concat [ dimTableDefnSQLs
|
||||
, factTableDefnSQLs
|
||||
, dimTableInsertSQLs
|
||||
, dimTablePopulateSQLs FullRefresh $ dimensionTablePopulateSQL FullPopulation
|
||||
, dimTablePopulateSQLs IncRefresh $ dimensionTablePopulateSQL IncrementalPopulation
|
||||
, factTableInsertSQLs
|
||||
]
|
||||
|
||||
|
@ -4,7 +4,7 @@ module Ringo
|
||||
, extractDimensionTables
|
||||
, G.tableDefnSQL
|
||||
, factTableDefnSQL
|
||||
, dimensionTableInsertSQL
|
||||
, dimensionTablePopulateSQL
|
||||
, factTableInsertSQL
|
||||
, validateTable
|
||||
, validateFact
|
||||
@ -27,8 +27,9 @@ extractDimensionTables env = flip runReader env . E.extractDimensionTables
|
||||
factTableDefnSQL :: Env -> Fact -> Table -> [Text]
|
||||
factTableDefnSQL env fact = flip runReader env . G.factTableDefnSQL fact
|
||||
|
||||
dimensionTableInsertSQL :: Env -> Fact -> TableName -> Text
|
||||
dimensionTableInsertSQL env fact = flip runReader env . G.dimensionTableInsertSQL fact
|
||||
dimensionTablePopulateSQL :: TablePopulationMode -> Env -> Fact -> TableName -> Text
|
||||
dimensionTablePopulateSQL popMode env fact =
|
||||
flip runReader env . G.dimensionTablePopulateSQL popMode fact
|
||||
|
||||
factTableInsertSQL :: Env -> Fact -> Text
|
||||
factTableInsertSQL env = flip runReader env . G.factTableInsertSQL
|
||||
|
@ -10,7 +10,7 @@ import Control.Applicative ((<$>))
|
||||
|
||||
import Control.Monad.Reader (Reader, asks)
|
||||
import Data.Function (on)
|
||||
import Data.Maybe (mapMaybe, fromMaybe, fromJust)
|
||||
import Data.Maybe (mapMaybe, fromMaybe, fromJust, catMaybes)
|
||||
import Data.Monoid ((<>))
|
||||
import Data.List (nub, nubBy)
|
||||
import Data.Text (Text)
|
||||
@ -48,10 +48,7 @@ extractDimensionTables fact = do
|
||||
let table = fromJust . findTable (factTableName fact) $ tables
|
||||
return $ dimsFromIds tables ++ dimsFromVals settings (tableColumns table)
|
||||
where
|
||||
dimsFromIds tables =
|
||||
forMaybe (factColumns fact) $ \fcol -> case fcol of
|
||||
DimId d _ -> findTable d tables
|
||||
_ -> Nothing
|
||||
dimsFromIds tables = catMaybes [ findTable d tables | DimId d _ <- factColumns fact ]
|
||||
|
||||
dimsFromVals Settings {..} tableColumns =
|
||||
map (\(dim, cols) ->
|
||||
|
@ -1,7 +1,7 @@
|
||||
module Ringo.Generator
|
||||
( tableDefnSQL
|
||||
, factTableDefnSQL
|
||||
, dimensionTableInsertSQL
|
||||
, dimensionTablePopulateSQL
|
||||
, factTableInsertSQL
|
||||
) where
|
||||
|
||||
@ -13,9 +13,10 @@ import Control.Applicative ((<$>))
|
||||
#endif
|
||||
|
||||
import Control.Monad.Reader (Reader, asks)
|
||||
import Data.List (intersperse, nub, find)
|
||||
import Data.Maybe (fromJust, mapMaybe)
|
||||
import Data.List (nub, find, subsequences, partition, sortBy)
|
||||
import Data.Maybe (fromJust, mapMaybe, catMaybes)
|
||||
import Data.Monoid ((<>))
|
||||
import Data.Ord (comparing)
|
||||
import Data.Text (Text)
|
||||
|
||||
import Ringo.Extractor.Internal
|
||||
@ -31,24 +32,44 @@ columnDefnSQL Column {..} =
|
||||
columnName <> " " <> columnType <> " " <> nullableDefnSQL columnNullable
|
||||
|
||||
colNamesString :: [ColumnName] -> Text
|
||||
colNamesString cNames = Text.concat (intersperse ", " cNames)
|
||||
colNamesString = Text.intercalate ", "
|
||||
|
||||
constraintDefnSQL :: TableName -> TableConstraint -> Text
|
||||
constraintDefnSQL tableName constraint =
|
||||
"ALTER TABLE ONLY " <> tableName <> " ADD "
|
||||
<> case constraint of
|
||||
PrimaryKey cName -> "PRIMARY KEY (" <> cName <> ")"
|
||||
UniqueKey cNames -> "UNIQUE (" <> colNamesString cNames <> ")"
|
||||
ForeignKey oTableName cNamePairs ->
|
||||
"FOREIGN KEY (" <> colNamesString (map fst cNamePairs) <> ") REFERENCES "
|
||||
<> oTableName <> " (" <> colNamesString (map snd cNamePairs) <> ")"
|
||||
fullColName :: TableName -> ColumnName -> ColumnName
|
||||
fullColName tName cName = tName <> "." <> cName
|
||||
|
||||
constraintDefnSQL :: Table -> TableConstraint -> [Text]
|
||||
constraintDefnSQL Table {..} constraint =
|
||||
let alterTableSQL = "ALTER TABLE ONLY " <> tableName <> " ADD "
|
||||
in case constraint of
|
||||
PrimaryKey cName -> [ alterTableSQL <> "PRIMARY KEY (" <> cName <> ")" ]
|
||||
ForeignKey oTableName cNamePairs ->
|
||||
[ alterTableSQL <> "FOREIGN KEY (" <> colNamesString (map fst cNamePairs) <> ") REFERENCES "
|
||||
<> oTableName <> " (" <> colNamesString (map snd cNamePairs) <> ")" ]
|
||||
UniqueKey cNames -> let
|
||||
(notNullCols, nullCols) =
|
||||
both (map columnName)
|
||||
$ partition ((== NotNull) . columnNullable)
|
||||
$ catMaybes [ findColumn cName tableColumns | cName <- cNames ]
|
||||
combinations =
|
||||
map (\cs -> (cs, [ c | c <- nullCols, c `notElem` cs ]))
|
||||
. sortBy (comparing length)
|
||||
$ subsequences nullCols
|
||||
in [ "CREATE UNIQUE INDEX ON " <> tableName
|
||||
<> " (" <> colNamesString (notNullCols ++ nnCols) <> ")"
|
||||
<> if null whereClauses
|
||||
then ""
|
||||
else "\nWHERE "<> Text.intercalate "\nAND " whereClauses
|
||||
| (nnCols, nCols) <- combinations
|
||||
, not $ null (notNullCols ++ nnCols)
|
||||
, let whereClauses =
|
||||
[ c <> " IS NOT NULL" | c <- nnCols ] ++ [ c <> " IS NULL" | c <- nCols ] ]
|
||||
|
||||
tableDefnSQL :: Table -> [Text]
|
||||
tableDefnSQL Table {..} =
|
||||
tableSQL : map (constraintDefnSQL tableName) tableConstraints
|
||||
tableDefnSQL table@Table {..} =
|
||||
tableSQL : concatMap (constraintDefnSQL table) tableConstraints
|
||||
where
|
||||
tableSQL = "CREATE TABLE " <> tableName <> " (\n"
|
||||
<> (Text.concat . intersperse ",\n" . map columnDefnSQL $ tableColumns)
|
||||
<> (Text.intercalate ",\n" . map columnDefnSQL $ tableColumns)
|
||||
<> "\n)"
|
||||
|
||||
factTableDefnSQL :: Fact -> Table -> Reader Env [Text]
|
||||
@ -71,21 +92,33 @@ factTableDefnSQL fact table = do
|
||||
|
||||
dimColumnMapping :: Text -> Fact -> TableName -> [(ColumnName, ColumnName)]
|
||||
dimColumnMapping dimPrefix fact dimTableName =
|
||||
forMaybe (factColumns fact) $ \fCol -> case fCol of
|
||||
DimVal dName cName | dimPrefix <> dName == dimTableName ->
|
||||
Just (dimColumnName dName cName, cName)
|
||||
_ -> Nothing
|
||||
[ (dimColumnName dName cName, cName)
|
||||
| DimVal dName cName <- factColumns fact , dimPrefix <> dName == dimTableName]
|
||||
|
||||
dimensionTableInsertSQL :: Fact -> TableName -> Reader Env Text
|
||||
dimensionTableInsertSQL fact dimTableName = do
|
||||
dimPrefix <- settingDimPrefix <$> asks envSettings
|
||||
let colMapping = dimColumnMapping dimPrefix fact dimTableName
|
||||
|
||||
return $ "INSERT INTO " <> dimTableName <> " (\n"
|
||||
<> colNamesString (map fst colMapping)
|
||||
<> "\n) SELECT DISTINCT \n"
|
||||
<> colNamesString (map snd colMapping)
|
||||
<> "\nFROM " <> factTableName fact
|
||||
dimensionTablePopulateSQL :: TablePopulationMode -> Fact -> TableName -> Reader Env Text
|
||||
dimensionTablePopulateSQL popMode fact dimTableName = do
|
||||
dimPrefix <- settingDimPrefix <$> asks envSettings
|
||||
let colMapping = dimColumnMapping dimPrefix fact dimTableName
|
||||
baseSelectC = "SELECT DISTINCT\n" <> colNamesString (map snd colMapping) <> "\n"
|
||||
<> "FROM " <> factTableName fact
|
||||
insertC selectC = "INSERT INTO " <> dimTableName
|
||||
<> " (\n" <> colNamesString (map fst colMapping) <> "\n) "
|
||||
<> "SELECT x.* FROM (\n" <> selectC <> ") x"
|
||||
timeCol = head [ cName | DimTime cName <- factColumns fact ]
|
||||
return $ case popMode of
|
||||
FullPopulation -> insertC baseSelectC
|
||||
IncrementalPopulation ->
|
||||
insertC (baseSelectC <> "\nWHERE "
|
||||
<> timeCol <> " > ? AND " <> timeCol <> " <= ?"
|
||||
<> " AND (\n"
|
||||
<> Text.intercalate "\nOR " [ c <> " IS NOT NULL" | (_, c) <- colMapping ]
|
||||
<> "\n)")
|
||||
<> "\nLEFT JOIN " <> dimTableName <> " ON\n"
|
||||
<> Text.intercalate " \nAND "
|
||||
[ fullColName dimTableName c1 <> " IS NOT DISTINCT FROM " <> fullColName "x" c2
|
||||
| (c1, c2) <- colMapping ]
|
||||
<> "\nWHERE " <> Text.intercalate " \nAND "
|
||||
[ fullColName dimTableName c <> " IS NULL" | (c, _) <- colMapping ]
|
||||
|
||||
factTableInsertSQL :: Fact -> Reader Env Text
|
||||
factTableInsertSQL fact = do
|
||||
@ -131,7 +164,7 @@ factTableInsertSQL fact = do
|
||||
[ fullColName tableName c1 <> " = " <> fullColName factSourceTableName c2
|
||||
| (c1, c2) <- dimColumnMapping settingDimPrefix dimFact tableName ]
|
||||
in "SELECT " <> dimIdColName <> " FROM " <> tableName <> "\nWHERE "
|
||||
<> (Text.concat . intersperse "\n AND " $ dimLookupWhereClauses)
|
||||
<> (Text.intercalate "\n AND " $ dimLookupWhereClauses)
|
||||
in (colName, insertSQL, True)
|
||||
|
||||
colMap = [ (cName, if addAs then asName cName sql else sql, addAs)
|
||||
@ -147,18 +180,17 @@ factTableInsertSQL fact = do
|
||||
<> extractedFactTableName settingFactPrefix settingFactInfix (factName fact) settingTimeUnit
|
||||
<> " (\n" <> unlineCols (map fst3 colMap) <> "\n)"
|
||||
<> "\nSELECT \n" <> unlineCols (map snd3 colMap)
|
||||
<> "\nFROM " <> fTableName <> "\n" <> Text.concat (intersperse "\n" joinClauses)
|
||||
<> "\nFROM " <> fTableName <> "\n" <> Text.intercalate"\n" joinClauses
|
||||
<> "\nGROUP BY \n"
|
||||
<> unlineCols (map ((groupByColPrefix <>) . fst3) . filter thd3 $ colMap)
|
||||
where
|
||||
groupByColPrefix = "xxff_"
|
||||
fullColName tName cName = tName <> "." <> cName
|
||||
asName cName sql = "(" <> sql <> ")" <> " as " <> groupByColPrefix <> cName
|
||||
unlineCols = Text.concat . intersperse ",\n "
|
||||
unlineCols = Text.intercalate ",\n "
|
||||
|
||||
joinClausePreds table oTableName =
|
||||
fmap (\(ForeignKey _ colPairs) ->
|
||||
Text.concat . intersperse " AND "
|
||||
Text.intercalate " AND "
|
||||
. map (\(c1, c2) -> fullColName (tableName table) c1 <> " = " <> fullColName oTableName c2)
|
||||
$ colPairs )
|
||||
. find (\cons -> case cons of
|
||||
|
@ -93,9 +93,10 @@ defSettings = Settings
|
||||
, settingFactInfix = "_by_"
|
||||
}
|
||||
|
||||
data ValidationError = MissingTable !TableName
|
||||
| MissingFact !TableName
|
||||
| MissingColumn !TableName !ColumnName
|
||||
data ValidationError = MissingTable !TableName
|
||||
| MissingFact !TableName
|
||||
| MissingColumn !TableName !ColumnName
|
||||
| MissingTimeColumn !TableName
|
||||
deriving (Eq, Show)
|
||||
|
||||
data Env = Env
|
||||
@ -103,3 +104,5 @@ data Env = Env
|
||||
, envFacts :: ![Fact]
|
||||
, envSettings :: !Settings
|
||||
} deriving (Eq, Show)
|
||||
|
||||
data TablePopulationMode = FullPopulation | IncrementalPopulation deriving (Eq, Show)
|
||||
|
@ -1,7 +1,6 @@
|
||||
module Ringo.Validator
|
||||
( validateTable
|
||||
, validateFact
|
||||
, withFactValidation
|
||||
) where
|
||||
|
||||
#if MIN_VERSION_base(4,8,0)
|
||||
@ -40,10 +39,12 @@ validateFact Fact {..} = do
|
||||
case findTable factTableName tables of
|
||||
Nothing -> return [ MissingTable factTableName ]
|
||||
Just table -> do
|
||||
tableVs <- validateTable table
|
||||
parentVs <- concat <$> mapM checkFactParents factParentNames
|
||||
let colVs = concatMap (checkColumn tables table) factColumns
|
||||
return $ tableVs ++ parentVs ++ colVs
|
||||
tableVs <- validateTable table
|
||||
parentVs <- concat <$> mapM checkFactParents factParentNames
|
||||
let colVs = concatMap (checkColumn tables table) factColumns
|
||||
let timeVs = [ MissingTimeColumn factTableName
|
||||
| null [ c | DimTime c <- factColumns ] ]
|
||||
return $ tableVs ++ parentVs ++ colVs ++ timeVs
|
||||
where
|
||||
checkFactParents fName = do
|
||||
facts <- asks envFacts
|
||||
@ -58,10 +59,3 @@ validateFact Fact {..} = do
|
||||
checkColumnTable tables factCol = case factCol of
|
||||
DimId tName _ -> maybe [ MissingTable tName ] (const []) $ findTable tName tables
|
||||
_ -> []
|
||||
|
||||
withFactValidation :: Fact -> Reader Env a -> Reader Env (Either [ValidationError] a)
|
||||
withFactValidation fact func = do
|
||||
errors <- validateFact fact
|
||||
if not $ null errors
|
||||
then return $ Left errors
|
||||
else fmap Right func
|
||||
|
Loading…
Reference in New Issue
Block a user