|
|
|
@ -14,7 +14,7 @@ import Control.Applicative ((<$>)) |
|
|
|
|
|
|
|
|
|
import Control.Monad.Reader (Reader, asks) |
|
|
|
|
import Data.List (nub, find, subsequences, partition, sortBy) |
|
|
|
|
import Data.Maybe (fromJust, mapMaybe, catMaybes) |
|
|
|
|
import Data.Maybe (fromJust, fromMaybe, mapMaybe, catMaybes) |
|
|
|
|
import Data.Monoid ((<>)) |
|
|
|
|
import Data.Ord (comparing) |
|
|
|
|
import Data.Text (Text) |
|
|
|
@ -120,14 +120,23 @@ dimensionTablePopulateSQL popMode fact dimTableName = do |
|
|
|
|
<> "\nWHERE " <> Text.intercalate " \nAND " |
|
|
|
|
[ fullColName dimTableName c <> " IS NULL" | (c, _) <- colMapping ] |
|
|
|
|
|
|
|
|
|
factTablePopulateSQL :: TablePopulationMode -> Fact -> Reader Env Text |
|
|
|
|
data FactTablePopulateSelectSQL = FactTablePopulateSelectSQL |
|
|
|
|
{ ftpsSelectCols :: ![(Text, Text)] |
|
|
|
|
, ftpsSelectTable :: !Text |
|
|
|
|
, ftpsJoinClauses :: ![Text] |
|
|
|
|
, ftpsWhereClauses :: ![Text] |
|
|
|
|
, ftpsGroupByCols :: ![Text] |
|
|
|
|
} deriving (Show, Eq) |
|
|
|
|
|
|
|
|
|
factTablePopulateSQL :: TablePopulationMode -> Fact -> Reader Env [Text] |
|
|
|
|
factTablePopulateSQL popMode fact = do |
|
|
|
|
Settings {..} <- asks envSettings |
|
|
|
|
allDims <- extractAllDimensionTables fact |
|
|
|
|
tables <- asks envTables |
|
|
|
|
let fTableName = factTableName fact |
|
|
|
|
table = fromJust . findTable fTableName $ tables |
|
|
|
|
dimIdColName = settingDimTableIdColumnName |
|
|
|
|
Settings {..} <- asks envSettings |
|
|
|
|
allDims <- extractAllDimensionTables fact |
|
|
|
|
tables <- asks envTables |
|
|
|
|
let fTableName = factTableName fact |
|
|
|
|
table = fromJust . findTable fTableName $ tables |
|
|
|
|
dimIdColName = settingDimTableIdColumnName |
|
|
|
|
tablePKColName = head [ cName | PrimaryKey cName <- tableConstraints table ] |
|
|
|
|
|
|
|
|
|
timeUnitColumnInsertSQL cName = |
|
|
|
|
let colName = timeUnitColumnName dimIdColName cName settingTimeUnit |
|
|
|
@ -138,13 +147,13 @@ factTablePopulateSQL popMode fact = do |
|
|
|
|
) |
|
|
|
|
|
|
|
|
|
factColMap = concatFor (factColumns fact) $ \col -> case col of |
|
|
|
|
DimTime cName -> [ timeUnitColumnInsertSQL cName ] |
|
|
|
|
NoDimId cName -> [ (cName, fullColName fTableName cName, True) ] |
|
|
|
|
FactCount scName cName -> |
|
|
|
|
DimTime cName -> [ timeUnitColumnInsertSQL cName ] |
|
|
|
|
NoDimId cName -> [ (cName, fullColName fTableName cName, True) ] |
|
|
|
|
FactCount scName cName -> |
|
|
|
|
[ (cName, "count(" <> maybe "*" (fullColName fTableName) scName <> ")", False) ] |
|
|
|
|
FactSum scName cName -> |
|
|
|
|
FactSum scName cName -> |
|
|
|
|
[ (cName, "sum(" <> fullColName fTableName scName <> ")", False) ] |
|
|
|
|
FactAverage scName cName -> |
|
|
|
|
FactAverage scName cName -> |
|
|
|
|
[ ( cName <> settingAvgCountColumSuffix |
|
|
|
|
, "count(" <> fullColName fTableName scName <> ")" |
|
|
|
|
, False |
|
|
|
@ -154,6 +163,7 @@ factTablePopulateSQL popMode fact = do |
|
|
|
|
, False |
|
|
|
|
) |
|
|
|
|
] |
|
|
|
|
FactCountDistinct _ cName -> [ (cName, "'{}'::json", False)] |
|
|
|
|
_ -> [] |
|
|
|
|
|
|
|
|
|
dimColMap = for allDims $ \(dimFact, factTable@Table {..}) -> |
|
|
|
@ -169,31 +179,79 @@ factTablePopulateSQL popMode fact = do |
|
|
|
|
<> Text.intercalate "\n AND " dimLookupWhereClauses |
|
|
|
|
in (colName, insertSQL, True) |
|
|
|
|
|
|
|
|
|
colMap = [ (cName, if addAs then asName cName sql else sql, addAs) |
|
|
|
|
colMap = [ (cName, (sql, groupByColPrefix <> cName), addAs) |
|
|
|
|
| (cName, sql, addAs) <- factColMap ++ dimColMap ] |
|
|
|
|
|
|
|
|
|
joinClauses = |
|
|
|
|
mapMaybe (\tName -> (\p -> "LEFT JOIN " <> tName <> " ON "<> p) <$> joinClausePreds table tName) |
|
|
|
|
mapMaybe (\tName -> (\p -> "LEFT JOIN " <> tName <> "\nON "<> p) <$> joinClausePreds table tName) |
|
|
|
|
. nub |
|
|
|
|
. map (factTableName . fst) |
|
|
|
|
$ allDims |
|
|
|
|
|
|
|
|
|
timeCol = fullColName fTableName $ head [ cName | DimTime cName <- factColumns fact ] |
|
|
|
|
|
|
|
|
|
return $ "INSERT INTO " |
|
|
|
|
<> extractedFactTableName settingFactPrefix settingFactInfix (factName fact) settingTimeUnit |
|
|
|
|
<> " (\n" <> unlineCols (map fst3 colMap) <> "\n)" |
|
|
|
|
<> "\nSELECT \n" <> unlineCols (map snd3 colMap) |
|
|
|
|
<> "\nFROM " <> fTableName <> "\n" <> Text.intercalate"\n" joinClauses |
|
|
|
|
<> (if popMode == IncrementalPopulation |
|
|
|
|
then "\nWHERE " <> timeCol <> " > ? AND " <> timeCol <> " <= ?" |
|
|
|
|
else "") |
|
|
|
|
<> "\nGROUP BY \n" |
|
|
|
|
<> unlineCols (map ((groupByColPrefix <>) . fst3) . filter thd3 $ colMap) |
|
|
|
|
extFactTableName = |
|
|
|
|
extractedFactTableName settingFactPrefix settingFactInfix (factName fact) settingTimeUnit |
|
|
|
|
|
|
|
|
|
insertIntoSelectSQL = |
|
|
|
|
FactTablePopulateSelectSQL |
|
|
|
|
{ ftpsSelectCols = map snd3 colMap |
|
|
|
|
, ftpsSelectTable = fTableName |
|
|
|
|
, ftpsJoinClauses = joinClauses |
|
|
|
|
, ftpsWhereClauses = if popMode == IncrementalPopulation |
|
|
|
|
then [timeCol <> " > ?", timeCol <> " <= ?"] |
|
|
|
|
else [] |
|
|
|
|
, ftpsGroupByCols = map ((groupByColPrefix <>) . fst3) . filter thd3 $ colMap |
|
|
|
|
} |
|
|
|
|
|
|
|
|
|
insertIntoInsertSQL = "INSERT INTO " <> extFactTableName |
|
|
|
|
<> " (\n" <> Text.intercalate ",\n " (map fst3 colMap) <> "\n)" |
|
|
|
|
|
|
|
|
|
countDistinctCols = [ col | col@(FactCountDistinct _ _) <- factColumns fact] |
|
|
|
|
|
|
|
|
|
updateSQLs = |
|
|
|
|
let origGroupByCols = ftpsGroupByCols insertIntoSelectSQL |
|
|
|
|
origSelectCols = ftpsSelectCols insertIntoSelectSQL |
|
|
|
|
|
|
|
|
|
in for countDistinctCols $ \(FactCountDistinct scName cName) -> |
|
|
|
|
let unqCol = fullColName fTableName (fromMaybe tablePKColName scName) <> "::text" |
|
|
|
|
|
|
|
|
|
bucketSelectCols = |
|
|
|
|
[ ( "hashtext(" <> unqCol <> ") & " |
|
|
|
|
<> Text.pack (show $ bucketCount settingFactCountDistinctErrorRate - 1) |
|
|
|
|
, cName <> "_bnum") |
|
|
|
|
, ( "31 - floor(log(2, min(hashtext(" <> unqCol <> ") & ~(1 << 31))))::int" |
|
|
|
|
, cName <> "_bhash" |
|
|
|
|
) |
|
|
|
|
] |
|
|
|
|
|
|
|
|
|
selectSQL = toSelectSQL $ |
|
|
|
|
insertIntoSelectSQL |
|
|
|
|
{ ftpsSelectCols = filter ((`elem` origGroupByCols) . snd) origSelectCols ++ bucketSelectCols |
|
|
|
|
, ftpsGroupByCols = origGroupByCols ++ [cName <> "_bnum"] |
|
|
|
|
, ftpsWhereClauses = ftpsWhereClauses insertIntoSelectSQL ++ [ unqCol <> " IS NOT NULL" ] |
|
|
|
|
} |
|
|
|
|
|
|
|
|
|
aggSelectClause = |
|
|
|
|
"json_object_agg(" <> cName <> "_bnum, " <> cName <> "_bhash) AS " <> cName |
|
|
|
|
|
|
|
|
|
in "UPDATE " <> extFactTableName |
|
|
|
|
<> "\nSET " <> cName <> " = " <> fullColName "xyz" cName |
|
|
|
|
<> "\nFROM (" |
|
|
|
|
<> "\nSELECT " <> Text.intercalate ",\n" (origGroupByCols ++ [aggSelectClause]) |
|
|
|
|
<> "\nFROM (\n" <> selectSQL <> "\n) zyx" |
|
|
|
|
<> "\nGROUP BY \n" <> Text.intercalate ",\n" origGroupByCols |
|
|
|
|
<> "\n) xyz" |
|
|
|
|
<> "\n WHERE\n" |
|
|
|
|
<> Text.intercalate "\nAND " |
|
|
|
|
[ coalesceFKId (fullColName extFactTableName .fromJust . Text.stripPrefix groupByColPrefix $ col) |
|
|
|
|
<> " = " <> coalesceFKId (fullColName "xyz" col) |
|
|
|
|
| col <- origGroupByCols ] |
|
|
|
|
|
|
|
|
|
return $ insertIntoInsertSQL <> "\n" <> toSelectSQL insertIntoSelectSQL : |
|
|
|
|
if null countDistinctCols then [] else updateSQLs |
|
|
|
|
where |
|
|
|
|
groupByColPrefix = "xxff_" |
|
|
|
|
asName cName sql = "(" <> sql <> ")" <> " as " <> groupByColPrefix <> cName |
|
|
|
|
unlineCols = Text.intercalate ",\n " |
|
|
|
|
groupByColPrefix = "xxff_" |
|
|
|
|
|
|
|
|
|
joinClausePreds table oTableName = |
|
|
|
|
fmap (\(ForeignKey _ colPairs) -> |
|
|
|
@ -205,3 +263,25 @@ factTablePopulateSQL popMode fact = do |
|
|
|
|
_ -> False) |
|
|
|
|
. tableConstraints |
|
|
|
|
$ table |
|
|
|
|
|
|
|
|
|
toSelectSQL FactTablePopulateSelectSQL {..} = |
|
|
|
|
"SELECT \n" <> Text.intercalate ",\n " (map (uncurry asName) ftpsSelectCols) |
|
|
|
|
<> "\nFROM " <> ftpsSelectTable |
|
|
|
|
<> (if not . null $ ftpsJoinClauses |
|
|
|
|
then "\n" <> Text.intercalate"\n" ftpsJoinClauses |
|
|
|
|
else "") |
|
|
|
|
<> (if not . null $ ftpsWhereClauses |
|
|
|
|
then "\nWHERE " <> Text.intercalate "\nAND " ftpsWhereClauses |
|
|
|
|
else "") |
|
|
|
|
<> "\nGROUP BY \n" |
|
|
|
|
<> Text.intercalate ",\n " ftpsGroupByCols |
|
|
|
|
where |
|
|
|
|
asName sql alias = "(" <> sql <> ")" <> " as " <> alias |
|
|
|
|
|
|
|
|
|
coalesceFKId col = "coalesce(" <> col <> ", -1)" |
|
|
|
|
|
|
|
|
|
bucketCount :: Double -> Integer |
|
|
|
|
bucketCount errorRate = |
|
|
|
|
let power :: Double = fromIntegral (ceiling . logBase 2 $ (1.04 / errorRate) ** 2 :: Integer) |
|
|
|
|
in ceiling $ 2 ** power |
|
|
|
|
|
|
|
|
|