Adds coalescing to default values for fact table columns.
- Removes FKs from fact tables - Some coercions in SQL for faster processing
This commit is contained in:
parent
b994955399
commit
3978f33cd0
@ -28,7 +28,7 @@ library
|
||||
mtl >=2.1 && <2.3
|
||||
ghc-options: -Wall -fno-warn-unused-do-bind -funbox-strict-fields -fno-warn-orphans -O2
|
||||
default-extensions: OverloadedStrings, RecordWildCards, ScopedTypeVariables, BangPatterns,
|
||||
TupleSections, CPP
|
||||
TupleSections, CPP, NamedFieldPuns
|
||||
default-language: Haskell2010
|
||||
|
||||
executable ringo
|
||||
@ -49,7 +49,7 @@ executable ringo
|
||||
ringo
|
||||
ghc-options: -Wall -fno-warn-unused-do-bind -funbox-strict-fields -fno-warn-orphans -O2
|
||||
default-extensions: OverloadedStrings, RecordWildCards, ScopedTypeVariables, BangPatterns,
|
||||
TupleSections, CPP
|
||||
TupleSections, CPP, NamedFieldPuns
|
||||
default-language: Haskell2010
|
||||
|
||||
test-suite ringo-test
|
||||
|
@ -30,7 +30,7 @@ extractFactTable fact = do
|
||||
|
||||
columns = concatFor (factColumns fact) $ \col -> case col of
|
||||
DimTime cName ->
|
||||
[ Column (timeUnitColumnName dimIdColName cName settingTimeUnit) "integer" NotNull ]
|
||||
[ Column (timeUnitColumnName dimIdColName cName settingTimeUnit) "bigint" NotNull ]
|
||||
NoDimId cName -> [ fromJust . findColumn cName . tableColumns $ table]
|
||||
FactCount _ cName -> [ Column cName countColType NotNull ]
|
||||
FactSum scName cName -> [ Column cName (sourceColumnType scName) NotNull ]
|
||||
@ -41,17 +41,13 @@ extractFactTable fact = do
|
||||
FactCountDistinct _ cName -> [ Column cName "json" NotNull ]
|
||||
_ -> []
|
||||
|
||||
fks = for allDims $ \(fact', tab@Table {..}) ->
|
||||
fkCols = for allDims $ \(_, Table {..}) ->
|
||||
let colName = factDimFKIdColumnName settingDimPrefix dimIdColName tableName
|
||||
colType = idColTypeToFKIdColType settingDimTableIdColumnType
|
||||
colNullable =
|
||||
if tab `elem` tables || fact /= fact' || any ((== Null) . columnNullable) tableColumns
|
||||
then Null
|
||||
else NotNull
|
||||
in ( Column colName colType colNullable , ForeignKey tableName [(colName, dimIdColName)] )
|
||||
in Column colName colType NotNull
|
||||
|
||||
ukColNames =
|
||||
(++ map (columnName . fst) fks)
|
||||
(++ map columnName fkCols)
|
||||
. forMaybe (factColumns fact) $ \col -> case col of
|
||||
DimTime cName -> Just (timeUnitColumnName dimIdColName cName settingTimeUnit)
|
||||
NoDimId cName -> Just cName
|
||||
@ -60,8 +56,8 @@ extractFactTable fact = do
|
||||
return Table
|
||||
{ tableName =
|
||||
extractedFactTableName settingFactPrefix settingFactInfix (factName fact) settingTimeUnit
|
||||
, tableColumns = columns ++ map fst fks
|
||||
, tableConstraints = UniqueKey ukColNames : map snd fks
|
||||
, tableColumns = columns ++ fkCols
|
||||
, tableConstraints = [ UniqueKey ukColNames ]
|
||||
}
|
||||
|
||||
extractDependencies :: Fact -> Reader Env Dependencies
|
||||
|
@ -61,7 +61,10 @@ extractDimensionTables fact = do
|
||||
})
|
||||
. Map.toList
|
||||
. Map.mapWithKey
|
||||
(\dim -> map (\col -> col { columnName = dimColumnName dim (columnName col) }) . nub)
|
||||
(\dim -> map (\col -> col { columnName = dimColumnName dim (columnName col)
|
||||
, columnNullable = NotNull
|
||||
})
|
||||
. nub)
|
||||
. Map.fromListWith (flip (++))
|
||||
. mapMaybe (\fcol -> do
|
||||
DimVal d col <- fcol
|
||||
|
@ -13,10 +13,9 @@ import Control.Applicative ((<$>))
|
||||
#endif
|
||||
|
||||
import Control.Monad.Reader (Reader, asks)
|
||||
import Data.List (nub, find, subsequences, partition, sortBy)
|
||||
import Data.Maybe (fromJust, fromMaybe, mapMaybe, catMaybes)
|
||||
import Data.List (nub, find)
|
||||
import Data.Maybe (fromJust, fromMaybe, mapMaybe)
|
||||
import Data.Monoid ((<>))
|
||||
import Data.Ord (comparing)
|
||||
import Data.Text (Text)
|
||||
|
||||
import Ringo.Extractor.Internal
|
||||
@ -31,8 +30,8 @@ columnDefnSQL :: Column -> Text
|
||||
columnDefnSQL Column {..} =
|
||||
columnName <> " " <> columnType <> " " <> nullableDefnSQL columnNullable
|
||||
|
||||
colNamesString :: [ColumnName] -> Text
|
||||
colNamesString = Text.intercalate ", "
|
||||
joinColumnNames :: [ColumnName] -> Text
|
||||
joinColumnNames = Text.intercalate ",\n"
|
||||
|
||||
fullColName :: TableName -> ColumnName -> ColumnName
|
||||
fullColName tName cName = tName <> "." <> cName
|
||||
@ -43,34 +42,16 @@ constraintDefnSQL Table {..} constraint =
|
||||
in case constraint of
|
||||
PrimaryKey cName -> [ alterTableSQL <> "PRIMARY KEY (" <> cName <> ")" ]
|
||||
ForeignKey oTableName cNamePairs ->
|
||||
[ alterTableSQL <> "FOREIGN KEY (" <> colNamesString (map fst cNamePairs) <> ") REFERENCES "
|
||||
<> oTableName <> " (" <> colNamesString (map snd cNamePairs) <> ")" ]
|
||||
UniqueKey cNames -> ["CREATE UNIQUE INDEX ON " <> tableName <> "(" <> colNamesString cNames <> ")"]
|
||||
-- let
|
||||
-- (notNullCols, nullCols) =
|
||||
-- both (map columnName)
|
||||
-- $ partition ((== NotNull) . columnNullable)
|
||||
-- $ catMaybes [ findColumn cName tableColumns | cName <- cNames ]
|
||||
-- combinations =
|
||||
-- map (\cs -> (cs, [ c | c <- nullCols, c `notElem` cs ]))
|
||||
-- . sortBy (comparing length)
|
||||
-- $ subsequences nullCols
|
||||
-- in [ "CREATE UNIQUE INDEX ON " <> tableName
|
||||
-- <> " (" <> colNamesString (notNullCols ++ nnCols) <> ")"
|
||||
-- <> if null whereClauses
|
||||
-- then ""
|
||||
-- else "\nWHERE "<> Text.intercalate "\nAND " whereClauses
|
||||
-- | (nnCols, nCols) <- combinations
|
||||
-- , not $ null (notNullCols ++ nnCols)
|
||||
-- , let whereClauses =
|
||||
-- [ c <> " IS NOT NULL" | c <- nnCols ] ++ [ c <> " IS NULL" | c <- nCols ] ]
|
||||
[ alterTableSQL <> "FOREIGN KEY (" <> joinColumnNames (map fst cNamePairs) <> ") REFERENCES "
|
||||
<> oTableName <> " (" <> joinColumnNames (map snd cNamePairs) <> ")" ]
|
||||
UniqueKey cNames -> ["CREATE UNIQUE INDEX ON " <> tableName <> " (" <> joinColumnNames cNames <> ")"]
|
||||
|
||||
tableDefnSQL :: Table -> [Text]
|
||||
tableDefnSQL table@Table {..} =
|
||||
tableSQL : concatMap (constraintDefnSQL table) tableConstraints
|
||||
where
|
||||
tableSQL = "CREATE TABLE " <> tableName <> " (\n"
|
||||
<> (Text.intercalate ",\n" . map columnDefnSQL $ tableColumns)
|
||||
<> (joinColumnNames . map columnDefnSQL $ tableColumns)
|
||||
<> "\n)"
|
||||
|
||||
factTableDefnSQL :: Fact -> Table -> Reader Env [Text]
|
||||
@ -96,12 +77,14 @@ dimColumnMapping dimPrefix fact dimTableName =
|
||||
[ (dimColumnName dName cName, cName)
|
||||
| DimVal dName cName <- factColumns fact , dimPrefix <> dName == dimTableName]
|
||||
|
||||
coalesceColumn :: Column -> Text
|
||||
coalesceColumn Column{..} =
|
||||
coalesceColumn :: TableName -> Column -> Text
|
||||
coalesceColumn tName Column{..} =
|
||||
if columnNullable == Null
|
||||
then "coalesce(" <> columnName <> "," <> defVal columnType <> ")"
|
||||
else columnName
|
||||
then "coalesce(" <> fqColName <> "," <> defVal columnType <> ")"
|
||||
else fqColName
|
||||
where
|
||||
fqColName = fullColName tName columnName
|
||||
|
||||
defVal colType
|
||||
| "integer" `Text.isPrefixOf` colType = "-42"
|
||||
| "timestamp" `Text.isPrefixOf` colType = "'00-00-00 00:00:00'"
|
||||
@ -117,14 +100,15 @@ dimensionTablePopulateSQL popMode fact dimTableName = do
|
||||
let factTable = fromJust $ findTable (factTableName fact) tables
|
||||
colMapping = dimColumnMapping dimPrefix fact dimTableName
|
||||
baseSelectC = "SELECT DISTINCT\n"
|
||||
<> colNamesString
|
||||
<> joinColumnNames
|
||||
(map (\(_, c) ->
|
||||
coalesceColumn . fromJust . findColumn c $ (tableColumns factTable))
|
||||
let col = fromJust . findColumn c $ tableColumns factTable
|
||||
in coalesceColumn (factTableName fact) col)
|
||||
colMapping)
|
||||
<> "\n"
|
||||
<> "FROM " <> factTableName fact
|
||||
insertC selectC = "INSERT INTO " <> dimTableName
|
||||
<> " (\n" <> colNamesString (map fst colMapping) <> "\n) "
|
||||
<> " (\n" <> joinColumnNames (map fst colMapping) <> "\n) "
|
||||
<> "SELECT x.* FROM (\n" <> selectC <> ") x"
|
||||
timeCol = head [ cName | DimTime cName <- factColumns fact ]
|
||||
return $ case popMode of
|
||||
@ -156,21 +140,23 @@ factTablePopulateSQL popMode fact = do
|
||||
allDims <- extractAllDimensionTables fact
|
||||
tables <- asks envTables
|
||||
let fTableName = factTableName fact
|
||||
table = fromJust . findTable fTableName $ tables
|
||||
fTable = fromJust . findTable fTableName $ tables
|
||||
dimIdColName = settingDimTableIdColumnName
|
||||
tablePKColName = head [ cName | PrimaryKey cName <- tableConstraints table ]
|
||||
tablePKColName = head [ cName | PrimaryKey cName <- tableConstraints fTable ]
|
||||
|
||||
timeUnitColumnInsertSQL cName =
|
||||
let colName = timeUnitColumnName dimIdColName cName settingTimeUnit
|
||||
in ( colName
|
||||
, "floor(extract(epoch from " <> fullColName fTableName cName <> ")/"
|
||||
<> Text.pack (show $ timeUnitToSeconds settingTimeUnit) <> ")"
|
||||
, "extract(epoch from " <> fullColName fTableName cName <> ")::bigint/"
|
||||
<> Text.pack (show $ timeUnitToSeconds settingTimeUnit)
|
||||
, True
|
||||
)
|
||||
|
||||
factColMap = concatFor (factColumns fact) $ \col -> case col of
|
||||
DimTime cName -> [ timeUnitColumnInsertSQL cName ]
|
||||
NoDimId cName -> [ (cName, fullColName fTableName cName, True) ]
|
||||
NoDimId cName ->
|
||||
let sCol = fromJust . findColumn cName $ tableColumns fTable
|
||||
in [ (cName, coalesceColumn fTableName sCol, True) ]
|
||||
FactCount scName cName ->
|
||||
[ (cName, "count(" <> maybe "*" (fullColName fTableName) scName <> ")", False) ]
|
||||
FactSum scName cName ->
|
||||
@ -188,24 +174,32 @@ factTablePopulateSQL popMode fact = do
|
||||
FactCountDistinct _ cName -> [ (cName, "'{}'::json", False)]
|
||||
_ -> []
|
||||
|
||||
dimColMap = for allDims $ \(dimFact, factTable@Table {..}) ->
|
||||
dimColMap = for allDims $ \(dimFact, factTable@Table {tableName}) ->
|
||||
let colName = factDimFKIdColumnName settingDimPrefix dimIdColName tableName
|
||||
col = fromJust . findColumn colName $ tableColumns factSourceTable
|
||||
factSourceTableName = factTableName dimFact
|
||||
insertSQL = if factTable `elem` tables
|
||||
then fullColName factSourceTableName colName
|
||||
factSourceTable = fromJust . findTable factSourceTableName $ tables
|
||||
insertSQL = if factTable `elem` tables -- existing dimension table
|
||||
then (if columnNullable col == Null then coalesceFKId else id)
|
||||
$ fullColName factSourceTableName colName
|
||||
else let
|
||||
dimLookupWhereClauses =
|
||||
[ fullColName tableName c1 <> " = " <> fullColName factSourceTableName c2
|
||||
| (c1, c2) <- dimColumnMapping settingDimPrefix dimFact tableName ]
|
||||
[ fullColName tableName c1 <> " = " <> coalesceColumn factSourceTableName col2
|
||||
| (c1, c2) <- dimColumnMapping settingDimPrefix dimFact tableName
|
||||
, let col2 = fromJust . findColumn c2 $ tableColumns factSourceTable ]
|
||||
in "SELECT " <> dimIdColName <> " FROM " <> tableName <> "\nWHERE "
|
||||
<> Text.intercalate "\n AND " dimLookupWhereClauses
|
||||
in (colName, insertSQL, True)
|
||||
<> Text.intercalate "\n AND " dimLookupWhereClauses
|
||||
insertSQL' = if factSourceTableName == fTableName
|
||||
then insertSQL
|
||||
else coalesceFKId insertSQL
|
||||
|
||||
in (colName, insertSQL', True)
|
||||
|
||||
colMap = [ (cName, (sql, groupByColPrefix <> cName), addAs)
|
||||
| (cName, sql, addAs) <- factColMap ++ dimColMap ]
|
||||
|
||||
joinClauses =
|
||||
mapMaybe (\tName -> (\p -> "LEFT JOIN " <> tName <> "\nON "<> p) <$> joinClausePreds table tName)
|
||||
mapMaybe (\tName -> (\p -> "LEFT JOIN " <> tName <> "\nON "<> p) <$> joinClausePreds fTable tName)
|
||||
. nub
|
||||
. map (factTableName . fst)
|
||||
$ allDims
|
||||
@ -260,14 +254,14 @@ factTablePopulateSQL popMode fact = do
|
||||
in "UPDATE " <> extFactTableName
|
||||
<> "\nSET " <> cName <> " = " <> fullColName "xyz" cName
|
||||
<> "\nFROM ("
|
||||
<> "\nSELECT " <> Text.intercalate ",\n" (origGroupByCols ++ [aggSelectClause])
|
||||
<> "\nSELECT " <> joinColumnNames (origGroupByCols ++ [aggSelectClause])
|
||||
<> "\nFROM (\n" <> selectSQL <> "\n) zyx"
|
||||
<> "\nGROUP BY \n" <> Text.intercalate ",\n" origGroupByCols
|
||||
<> "\nGROUP BY \n" <> joinColumnNames origGroupByCols
|
||||
<> "\n) xyz"
|
||||
<> "\n WHERE\n"
|
||||
<> Text.intercalate "\nAND "
|
||||
[ coalesceFKId (fullColName extFactTableName .fromJust . Text.stripPrefix groupByColPrefix $ col)
|
||||
<> " = " <> coalesceFKId (fullColName "xyz" col)
|
||||
[ fullColName extFactTableName .fromJust . Text.stripPrefix groupByColPrefix $ col
|
||||
<> " = " <> fullColName "xyz" col
|
||||
| col <- origGroupByCols ]
|
||||
|
||||
return $ insertIntoInsertSQL <> "\n" <> toSelectSQL insertIntoSelectSQL :
|
||||
@ -287,20 +281,23 @@ factTablePopulateSQL popMode fact = do
|
||||
$ table
|
||||
|
||||
toSelectSQL FactTablePopulateSelectSQL {..} =
|
||||
"SELECT \n" <> Text.intercalate ",\n " (map (uncurry asName) ftpsSelectCols)
|
||||
"SELECT \n" <> joinColumnNames (map (uncurry asName) ftpsSelectCols)
|
||||
<> "\nFROM " <> ftpsSelectTable
|
||||
<> (if not . null $ ftpsJoinClauses
|
||||
then "\n" <> Text.intercalate"\n" ftpsJoinClauses
|
||||
then "\n" <> Text.intercalate "\n" ftpsJoinClauses
|
||||
else "")
|
||||
<> (if not . null $ ftpsWhereClauses
|
||||
then "\nWHERE " <> Text.intercalate "\nAND " ftpsWhereClauses
|
||||
else "")
|
||||
<> "\nGROUP BY \n"
|
||||
<> Text.intercalate ",\n " ftpsGroupByCols
|
||||
<> joinColumnNames ftpsGroupByCols
|
||||
where
|
||||
asName sql alias = "(" <> sql <> ")" <> " as " <> alias
|
||||
|
||||
coalesceFKId col = "coalesce(" <> col <> ", -1)"
|
||||
coalesceFKId col =
|
||||
if "coalesce" `Text.isPrefixOf` col
|
||||
then col
|
||||
else "coalesce((" <> col <> "), -1)"
|
||||
|
||||
bucketCount :: Double -> Integer
|
||||
bucketCount errorRate =
|
||||
|
@ -105,6 +105,7 @@ data ValidationError = MissingTable !TableName
|
||||
| MissingFact !TableName
|
||||
| MissingColumn !TableName !ColumnName
|
||||
| MissingTimeColumn !TableName
|
||||
| NullableColumn !TableName !ColumnName
|
||||
deriving (Eq, Show)
|
||||
|
||||
data Env = Env
|
||||
|
@ -10,6 +10,7 @@ import Control.Applicative ((<$>))
|
||||
|
||||
import Control.Monad.Reader (Reader, asks)
|
||||
|
||||
import Data.Maybe (isJust, fromJust)
|
||||
import Ringo.Types
|
||||
import Ringo.Utils
|
||||
|
||||
@ -44,7 +45,12 @@ validateFact Fact {..} = do
|
||||
let colVs = concatMap (checkColumn tables table) factColumns
|
||||
let timeVs = [ MissingTimeColumn factTableName
|
||||
| null [ c | DimTime c <- factColumns ] ]
|
||||
return $ tableVs ++ parentVs ++ colVs ++ timeVs
|
||||
let notNullVs = [ NullableColumn factTableName c
|
||||
| DimTime c <- factColumns
|
||||
, let col = findColumn c (tableColumns table)
|
||||
, isJust col
|
||||
, columnNullable (fromJust col) == Null ]
|
||||
return $ tableVs ++ parentVs ++ colVs ++ timeVs ++ notNullVs
|
||||
where
|
||||
checkFactParents fName = do
|
||||
facts <- asks envFacts
|
||||
|
Loading…
Reference in New Issue
Block a user