Adds incremental refresh sql generation for dimension tables.

- Adds partial unique indexes for tables to handle null columns.
- Adds validation for time column in fact tables.
pull/1/head
Abhinav Sarkar 2015-12-19 11:55:08 +05:30
parent e46d3684cd
commit 7dc6db944f
6 changed files with 99 additions and 73 deletions

View File

@ -14,7 +14,7 @@ import Ringo
import Ringo.ArgParser
import Ringo.InputParser
data SQLType = Create | Populate | Update deriving (Eq, Show)
data SQLType = Create | FullRefresh | IncRefresh deriving (Eq, Show)
main :: IO ()
main = do
@ -36,30 +36,29 @@ writeSQLFiles outputDir env@Env{..} = forM_ sqls $ \(sqlType, table, sql) -> do
createDirectoryIfMissing True dirName
writeFile fileName sql
where
dimTables = map (\fact -> (fact, extractDimensionTables env fact)) envFacts
factTables = map (\fact -> (fact, extractFactTable env fact)) envFacts
dimTables = [ (fact, extractDimensionTables env fact) | fact <- envFacts ]
factTables = [ (fact, extractFactTable env fact) | fact <- envFacts ]
dimTableDefnSQLs = [ (Create, tableName table, unlines . map sqlStr . tableDefnSQL $ table)
| (_, tabs) <- dimTables
, table <- tabs
| (_, tabs) <- dimTables
, table <- tabs
, table `notElem` envTables ]
factTableDefnSQLs = [ (Create
, tableName table, unlines . map sqlStr $ factTableDefnSQL env fact table)
factTableDefnSQLs = [ (Create , tableName table, unlines . map sqlStr $ factTableDefnSQL env fact table)
| (fact, table) <- factTables ]
dimTableInsertSQLs = [ (Populate
, tableName table
, sqlStr $ dimensionTableInsertSQL env fact (tableName table))
| (fact, tabs) <- dimTables
, table <- tabs
, table `notElem` envTables ]
dimTablePopulateSQLs typ gen = [ (typ , tableName table, sqlStr $ gen env fact (tableName table))
| (fact, tabs) <- dimTables
, table <- tabs
, table `notElem` envTables ]
factTableInsertSQLs = [ (Populate, tableName table, sqlStr $ factTableInsertSQL env fact)
factTableInsertSQLs = [ (FullRefresh, tableName table, sqlStr $ factTableInsertSQL env fact)
| (fact, table) <- factTables ]
sqls = concat [ dimTableDefnSQLs
, factTableDefnSQLs
, dimTableInsertSQLs
, dimTablePopulateSQLs FullRefresh $ dimensionTablePopulateSQL FullPopulation
, dimTablePopulateSQLs IncRefresh $ dimensionTablePopulateSQL IncrementalPopulation
, factTableInsertSQLs
]

View File

@ -4,7 +4,7 @@ module Ringo
, extractDimensionTables
, G.tableDefnSQL
, factTableDefnSQL
, dimensionTableInsertSQL
, dimensionTablePopulateSQL
, factTableInsertSQL
, validateTable
, validateFact
@ -27,8 +27,9 @@ extractDimensionTables env = flip runReader env . E.extractDimensionTables
factTableDefnSQL :: Env -> Fact -> Table -> [Text]
factTableDefnSQL env fact = flip runReader env . G.factTableDefnSQL fact
dimensionTableInsertSQL :: Env -> Fact -> TableName -> Text
dimensionTableInsertSQL env fact = flip runReader env . G.dimensionTableInsertSQL fact
dimensionTablePopulateSQL :: TablePopulationMode -> Env -> Fact -> TableName -> Text
dimensionTablePopulateSQL popMode env fact =
flip runReader env . G.dimensionTablePopulateSQL popMode fact
factTableInsertSQL :: Env -> Fact -> Text
factTableInsertSQL env = flip runReader env . G.factTableInsertSQL

View File

@ -10,7 +10,7 @@ import Control.Applicative ((<$>))
import Control.Monad.Reader (Reader, asks)
import Data.Function (on)
import Data.Maybe (mapMaybe, fromMaybe, fromJust)
import Data.Maybe (mapMaybe, fromMaybe, fromJust, catMaybes)
import Data.Monoid ((<>))
import Data.List (nub, nubBy)
import Data.Text (Text)
@ -48,10 +48,7 @@ extractDimensionTables fact = do
let table = fromJust . findTable (factTableName fact) $ tables
return $ dimsFromIds tables ++ dimsFromVals settings (tableColumns table)
where
dimsFromIds tables =
forMaybe (factColumns fact) $ \fcol -> case fcol of
DimId d _ -> findTable d tables
_ -> Nothing
dimsFromIds tables = catMaybes [ findTable d tables | DimId d _ <- factColumns fact ]
dimsFromVals Settings {..} tableColumns =
map (\(dim, cols) ->

View File

@ -1,7 +1,7 @@
module Ringo.Generator
( tableDefnSQL
, factTableDefnSQL
, dimensionTableInsertSQL
, dimensionTablePopulateSQL
, factTableInsertSQL
) where
@ -13,9 +13,10 @@ import Control.Applicative ((<$>))
#endif
import Control.Monad.Reader (Reader, asks)
import Data.List (intersperse, nub, find)
import Data.Maybe (fromJust, mapMaybe)
import Data.List (nub, find, subsequences, partition, sortBy)
import Data.Maybe (fromJust, mapMaybe, catMaybes)
import Data.Monoid ((<>))
import Data.Ord (comparing)
import Data.Text (Text)
import Ringo.Extractor.Internal
@ -31,24 +32,44 @@ columnDefnSQL Column {..} =
columnName <> " " <> columnType <> " " <> nullableDefnSQL columnNullable
colNamesString :: [ColumnName] -> Text
colNamesString cNames = Text.concat (intersperse ", " cNames)
colNamesString = Text.intercalate ", "
constraintDefnSQL :: TableName -> TableConstraint -> Text
constraintDefnSQL tableName constraint =
"ALTER TABLE ONLY " <> tableName <> " ADD "
<> case constraint of
PrimaryKey cName -> "PRIMARY KEY (" <> cName <> ")"
UniqueKey cNames -> "UNIQUE (" <> colNamesString cNames <> ")"
ForeignKey oTableName cNamePairs ->
"FOREIGN KEY (" <> colNamesString (map fst cNamePairs) <> ") REFERENCES "
<> oTableName <> " (" <> colNamesString (map snd cNamePairs) <> ")"
fullColName :: TableName -> ColumnName -> ColumnName
fullColName tName cName = tName <> "." <> cName
constraintDefnSQL :: Table -> TableConstraint -> [Text]
constraintDefnSQL Table {..} constraint =
let alterTableSQL = "ALTER TABLE ONLY " <> tableName <> " ADD "
in case constraint of
PrimaryKey cName -> [ alterTableSQL <> "PRIMARY KEY (" <> cName <> ")" ]
ForeignKey oTableName cNamePairs ->
[ alterTableSQL <> "FOREIGN KEY (" <> colNamesString (map fst cNamePairs) <> ") REFERENCES "
<> oTableName <> " (" <> colNamesString (map snd cNamePairs) <> ")" ]
UniqueKey cNames -> let
(notNullCols, nullCols) =
both (map columnName)
$ partition ((== NotNull) . columnNullable)
$ catMaybes [ findColumn cName tableColumns | cName <- cNames ]
combinations =
map (\cs -> (cs, [ c | c <- nullCols, c `notElem` cs ]))
. sortBy (comparing length)
$ subsequences nullCols
in [ "CREATE UNIQUE INDEX ON " <> tableName
<> " (" <> colNamesString (notNullCols ++ nnCols) <> ")"
<> if null whereClauses
then ""
else "\nWHERE "<> Text.intercalate "\nAND " whereClauses
| (nnCols, nCols) <- combinations
, not $ null (notNullCols ++ nnCols)
, let whereClauses =
[ c <> " IS NOT NULL" | c <- nnCols ] ++ [ c <> " IS NULL" | c <- nCols ] ]
tableDefnSQL :: Table -> [Text]
tableDefnSQL Table {..} =
tableSQL : map (constraintDefnSQL tableName) tableConstraints
tableDefnSQL table@Table {..} =
tableSQL : concatMap (constraintDefnSQL table) tableConstraints
where
tableSQL = "CREATE TABLE " <> tableName <> " (\n"
<> (Text.concat . intersperse ",\n" . map columnDefnSQL $ tableColumns)
<> (Text.intercalate ",\n" . map columnDefnSQL $ tableColumns)
<> "\n)"
factTableDefnSQL :: Fact -> Table -> Reader Env [Text]
@ -71,21 +92,33 @@ factTableDefnSQL fact table = do
dimColumnMapping :: Text -> Fact -> TableName -> [(ColumnName, ColumnName)]
dimColumnMapping dimPrefix fact dimTableName =
forMaybe (factColumns fact) $ \fCol -> case fCol of
DimVal dName cName | dimPrefix <> dName == dimTableName ->
Just (dimColumnName dName cName, cName)
_ -> Nothing
[ (dimColumnName dName cName, cName)
| DimVal dName cName <- factColumns fact , dimPrefix <> dName == dimTableName]
dimensionTableInsertSQL :: Fact -> TableName -> Reader Env Text
dimensionTableInsertSQL fact dimTableName = do
dimPrefix <- settingDimPrefix <$> asks envSettings
let colMapping = dimColumnMapping dimPrefix fact dimTableName
return $ "INSERT INTO " <> dimTableName <> " (\n"
<> colNamesString (map fst colMapping)
<> "\n) SELECT DISTINCT \n"
<> colNamesString (map snd colMapping)
<> "\nFROM " <> factTableName fact
dimensionTablePopulateSQL :: TablePopulationMode -> Fact -> TableName -> Reader Env Text
dimensionTablePopulateSQL popMode fact dimTableName = do
dimPrefix <- settingDimPrefix <$> asks envSettings
let colMapping = dimColumnMapping dimPrefix fact dimTableName
baseSelectC = "SELECT DISTINCT\n" <> colNamesString (map snd colMapping) <> "\n"
<> "FROM " <> factTableName fact
insertC selectC = "INSERT INTO " <> dimTableName
<> " (\n" <> colNamesString (map fst colMapping) <> "\n) "
<> "SELECT x.* FROM (\n" <> selectC <> ") x"
timeCol = head [ cName | DimTime cName <- factColumns fact ]
return $ case popMode of
FullPopulation -> insertC baseSelectC
IncrementalPopulation ->
insertC (baseSelectC <> "\nWHERE "
<> timeCol <> " > ? AND " <> timeCol <> " <= ?"
<> " AND (\n"
<> Text.intercalate "\nOR " [ c <> " IS NOT NULL" | (_, c) <- colMapping ]
<> "\n)")
<> "\nLEFT JOIN " <> dimTableName <> " ON\n"
<> Text.intercalate " \nAND "
[ fullColName dimTableName c1 <> " IS NOT DISTINCT FROM " <> fullColName "x" c2
| (c1, c2) <- colMapping ]
<> "\nWHERE " <> Text.intercalate " \nAND "
[ fullColName dimTableName c <> " IS NULL" | (c, _) <- colMapping ]
factTableInsertSQL :: Fact -> Reader Env Text
factTableInsertSQL fact = do
@ -131,7 +164,7 @@ factTableInsertSQL fact = do
[ fullColName tableName c1 <> " = " <> fullColName factSourceTableName c2
| (c1, c2) <- dimColumnMapping settingDimPrefix dimFact tableName ]
in "SELECT " <> dimIdColName <> " FROM " <> tableName <> "\nWHERE "
<> (Text.concat . intersperse "\n AND " $ dimLookupWhereClauses)
<> (Text.intercalate "\n AND " $ dimLookupWhereClauses)
in (colName, insertSQL, True)
colMap = [ (cName, if addAs then asName cName sql else sql, addAs)
@ -147,18 +180,17 @@ factTableInsertSQL fact = do
<> extractedFactTableName settingFactPrefix settingFactInfix (factName fact) settingTimeUnit
<> " (\n" <> unlineCols (map fst3 colMap) <> "\n)"
<> "\nSELECT \n" <> unlineCols (map snd3 colMap)
<> "\nFROM " <> fTableName <> "\n" <> Text.concat (intersperse "\n" joinClauses)
<> "\nFROM " <> fTableName <> "\n" <> Text.intercalate"\n" joinClauses
<> "\nGROUP BY \n"
<> unlineCols (map ((groupByColPrefix <>) . fst3) . filter thd3 $ colMap)
where
groupByColPrefix = "xxff_"
fullColName tName cName = tName <> "." <> cName
asName cName sql = "(" <> sql <> ")" <> " as " <> groupByColPrefix <> cName
unlineCols = Text.concat . intersperse ",\n "
unlineCols = Text.intercalate ",\n "
joinClausePreds table oTableName =
fmap (\(ForeignKey _ colPairs) ->
Text.concat . intersperse " AND "
Text.intercalate " AND "
. map (\(c1, c2) -> fullColName (tableName table) c1 <> " = " <> fullColName oTableName c2)
$ colPairs )
. find (\cons -> case cons of

View File

@ -93,9 +93,10 @@ defSettings = Settings
, settingFactInfix = "_by_"
}
data ValidationError = MissingTable !TableName
| MissingFact !TableName
| MissingColumn !TableName !ColumnName
data ValidationError = MissingTable !TableName
| MissingFact !TableName
| MissingColumn !TableName !ColumnName
| MissingTimeColumn !TableName
deriving (Eq, Show)
data Env = Env
@ -103,3 +104,5 @@ data Env = Env
, envFacts :: ![Fact]
, envSettings :: !Settings
} deriving (Eq, Show)
data TablePopulationMode = FullPopulation | IncrementalPopulation deriving (Eq, Show)

View File

@ -1,7 +1,6 @@
module Ringo.Validator
( validateTable
, validateFact
, withFactValidation
) where
#if MIN_VERSION_base(4,8,0)
@ -40,10 +39,12 @@ validateFact Fact {..} = do
case findTable factTableName tables of
Nothing -> return [ MissingTable factTableName ]
Just table -> do
tableVs <- validateTable table
parentVs <- concat <$> mapM checkFactParents factParentNames
let colVs = concatMap (checkColumn tables table) factColumns
return $ tableVs ++ parentVs ++ colVs
tableVs <- validateTable table
parentVs <- concat <$> mapM checkFactParents factParentNames
let colVs = concatMap (checkColumn tables table) factColumns
let timeVs = [ MissingTimeColumn factTableName
| null [ c | DimTime c <- factColumns ] ]
return $ tableVs ++ parentVs ++ colVs ++ timeVs
where
checkFactParents fName = do
facts <- asks envFacts
@ -58,10 +59,3 @@ validateFact Fact {..} = do
checkColumnTable tables factCol = case factCol of
DimId tName _ -> maybe [ MissingTable tName ] (const []) $ findTable tName tables
_ -> []
withFactValidation :: Fact -> Reader Env a -> Reader Env (Either [ValidationError] a)
withFactValidation fact func = do
errors <- validateFact fact
if not $ null errors
then return $ Left errors
else fmap Right func