2015-12-29 18:22:01 +05:30
|
|
|
{-# LANGUAGE QuasiQuotes #-}
|
2015-12-29 17:11:15 +05:30
|
|
|
module Ringo.Generator.Populate.Fact (factTablePopulateSQL) where
|
|
|
|
|
|
|
|
import qualified Data.Text as Text
|
|
|
|
|
|
|
|
#if MIN_VERSION_base(4,8,0)
|
|
|
|
#else
|
|
|
|
import Control.Applicative ((<$>))
|
|
|
|
#endif
|
|
|
|
|
|
|
|
import Control.Monad.Reader (Reader, asks)
|
|
|
|
import Data.List (nub)
|
|
|
|
import Data.Maybe (fromJust, fromMaybe, mapMaybe, listToMaybe)
|
|
|
|
import Data.Monoid ((<>))
|
|
|
|
import Data.Text (Text)
|
2015-12-29 18:22:01 +05:30
|
|
|
import Text.RawString.QQ (r)
|
2015-12-29 17:11:15 +05:30
|
|
|
|
|
|
|
import Ringo.Extractor.Internal
|
|
|
|
import Ringo.Generator.Internal
|
|
|
|
import Ringo.Types
|
|
|
|
import Ringo.Utils
|
|
|
|
|
2015-12-29 18:22:01 +05:30
|
|
|
ilog2FunctionString :: Text
|
|
|
|
ilog2FunctionString = [r|CREATE OR REPLACE FUNCTION ilog2(v integer)
|
|
|
|
RETURNS integer AS
|
|
|
|
$$
|
|
|
|
DECLARE
|
|
|
|
r integer;
|
|
|
|
shift integer;
|
|
|
|
BEGIN
|
|
|
|
IF v > x'FFFF'::integer THEN r := 1 << 4; ELSE r := 0 << 4; END IF;
|
|
|
|
v := v >> r;
|
|
|
|
IF v > x'FF'::integer THEN shift := 1 << 3; ELSE shift := 0 << 3; END IF;
|
|
|
|
v := v >> shift;
|
|
|
|
r := r | shift;
|
|
|
|
IF v > x'F'::integer THEN shift := 1 << 2; ELSE shift := 0 << 2; END IF;
|
|
|
|
v := v >> shift;
|
|
|
|
r := r | shift;
|
|
|
|
IF v > x'3'::integer THEN shift := 1 << 1; ELSE shift := 0 << 3; END IF;
|
|
|
|
v := v >> shift;
|
|
|
|
r := r | shift;
|
|
|
|
r := r | (v >> 1);
|
|
|
|
RETURN r;
|
|
|
|
END;
|
|
|
|
$$
|
|
|
|
LANGUAGE 'plpgsql' IMMUTABLE|]
|
|
|
|
|
2015-12-29 17:11:15 +05:30
|
|
|
data FactTablePopulateSelectSQL = FactTablePopulateSelectSQL
|
2015-12-30 12:34:35 +05:30
|
|
|
{ ftpsSelectCols :: ![(Text, Text)]
|
|
|
|
, ftpsSelectTable :: !Text
|
|
|
|
, ftpsJoinClauses :: ![Text]
|
|
|
|
, ftpsWhereClauses :: ![Text]
|
|
|
|
, ftpsGroupByCols :: ![Text]
|
|
|
|
} deriving (Show, Eq)
|
2015-12-29 17:11:15 +05:30
|
|
|
|
2015-12-30 19:57:38 +05:30
|
|
|
factTableUpdateSQL :: TablePopulationMode -> Fact -> Text -> FactTablePopulateSelectSQL -> Reader Env [Text]
|
|
|
|
factTableUpdateSQL popMode fact groupByColPrefix populateSelectSQL@FactTablePopulateSelectSQL {..} = do
|
2015-12-29 17:11:15 +05:30
|
|
|
Settings {..} <- asks envSettings
|
|
|
|
tables <- asks envTables
|
|
|
|
let countDistinctCols = [ col | col@(FactCountDistinct _ _) <- factColumns fact]
|
|
|
|
fTableName = factTableName fact
|
|
|
|
fTable = fromJust . findTable fTableName $ tables
|
|
|
|
tablePKColName = head [ cName | PrimaryKey cName <- tableConstraints fTable ]
|
|
|
|
extFactTableName =
|
2015-12-30 19:57:38 +05:30
|
|
|
suffixTableName popMode settingTableNameSuffixTemplate
|
|
|
|
$ extractedFactTableName settingFactPrefix settingFactInfix (factName fact) settingTimeUnit
|
2015-12-29 17:11:15 +05:30
|
|
|
|
2015-12-29 18:22:01 +05:30
|
|
|
return . (\xs -> if null xs then xs else ilog2FunctionString : xs)
|
|
|
|
$ for countDistinctCols $ \(FactCountDistinct scName cName) ->
|
|
|
|
let unqCol = fullColumnName fTableName (fromMaybe tablePKColName scName) <> "::text"
|
2015-12-29 17:11:15 +05:30
|
|
|
|
2015-12-29 18:22:01 +05:30
|
|
|
bucketSelectCols =
|
|
|
|
[ ( "hashtext(" <> unqCol <> ") & "
|
|
|
|
<> Text.pack (show $ bucketCount settingFactCountDistinctErrorRate - 1)
|
|
|
|
, cName <> "_bnum"
|
|
|
|
)
|
|
|
|
, ( "31 - ilog2(min(hashtext(" <> unqCol <> ") & ~(1 << 31)))"
|
|
|
|
, cName <> "_bhash"
|
|
|
|
)
|
|
|
|
]
|
2015-12-29 17:11:15 +05:30
|
|
|
|
2015-12-29 18:22:01 +05:30
|
|
|
selectSQL = toSelectSQL $
|
|
|
|
populateSelectSQL
|
|
|
|
{ ftpsSelectCols = filter ((`elem` ftpsGroupByCols) . snd) ftpsSelectCols ++ bucketSelectCols
|
|
|
|
, ftpsGroupByCols = ftpsGroupByCols ++ [ cName <> "_bnum" ]
|
|
|
|
, ftpsWhereClauses = ftpsWhereClauses ++ [ unqCol <> " IS NOT NULL" ]
|
|
|
|
}
|
2015-12-29 17:11:15 +05:30
|
|
|
|
2015-12-29 18:22:01 +05:30
|
|
|
aggSelectClause =
|
|
|
|
"json_object_agg(" <> cName <> "_bnum, " <> cName <> "_bhash) AS " <> cName
|
2015-12-29 17:11:15 +05:30
|
|
|
|
2015-12-29 18:22:01 +05:30
|
|
|
in "UPDATE " <> extFactTableName
|
|
|
|
<> "\nSET " <> cName <> " = " <> fullColumnName "xyz" cName
|
|
|
|
<> "\nFROM ("
|
|
|
|
<> "\nSELECT " <> joinColumnNames (ftpsGroupByCols ++ [aggSelectClause])
|
|
|
|
<> "\nFROM (\n" <> selectSQL <> "\n) zyx"
|
|
|
|
<> "\nGROUP BY \n" <> joinColumnNames ftpsGroupByCols
|
|
|
|
<> "\n) xyz"
|
|
|
|
<> "\n WHERE\n"
|
|
|
|
<> Text.intercalate "\nAND "
|
|
|
|
[ fullColumnName extFactTableName .fromJust . Text.stripPrefix groupByColPrefix $ col
|
|
|
|
<> " = " <> fullColumnName "xyz" col
|
|
|
|
| col <- ftpsGroupByCols ]
|
2015-12-29 17:11:15 +05:30
|
|
|
where
|
|
|
|
bucketCount :: Double -> Integer
|
|
|
|
bucketCount errorRate =
|
|
|
|
let power :: Double = fromIntegral (ceiling . logBase 2 $ (1.04 / errorRate) ** 2 :: Integer)
|
|
|
|
in ceiling $ 2 ** power
|
|
|
|
|
|
|
|
factTablePopulateSQL :: TablePopulationMode -> Fact -> Reader Env [Text]
|
|
|
|
factTablePopulateSQL popMode fact = do
|
|
|
|
Settings {..} <- asks envSettings
|
|
|
|
allDims <- extractAllDimensionTables fact
|
|
|
|
tables <- asks envTables
|
|
|
|
defaults <- asks envTypeDefaults
|
|
|
|
let fTableName = factTableName fact
|
|
|
|
fTable = fromJust . findTable fTableName $ tables
|
|
|
|
dimIdColName = settingDimTableIdColumnName
|
|
|
|
|
2015-12-30 12:34:35 +05:30
|
|
|
coalesceFKId col =
|
|
|
|
if "coalesce" `Text.isPrefixOf` col
|
|
|
|
then col
|
|
|
|
else "coalesce((" <> col <> "), " <> Text.pack (show settingForeignKeyIdCoalesceValue) <> ")"
|
|
|
|
|
2015-12-29 17:11:15 +05:30
|
|
|
timeUnitColumnInsertSQL cName =
|
|
|
|
let colName = timeUnitColumnName dimIdColName cName settingTimeUnit
|
|
|
|
in ( colName
|
|
|
|
, "extract(epoch from " <> fullColumnName fTableName cName <> ")::bigint/"
|
|
|
|
<> Text.pack (show $ timeUnitToSeconds settingTimeUnit)
|
|
|
|
, True
|
|
|
|
)
|
2016-01-06 19:08:38 +05:30
|
|
|
dimIdColumnInsertSQL cName =
|
|
|
|
let sCol = fromJust . findColumn cName $ tableColumns fTable
|
|
|
|
in (cName, coalesceColumn defaults fTableName sCol, True)
|
2015-12-29 17:11:15 +05:30
|
|
|
|
|
|
|
factColMap = concatFor (factColumns fact) $ \col -> case col of
|
|
|
|
DimTime cName -> [ timeUnitColumnInsertSQL cName ]
|
2016-01-06 19:08:38 +05:30
|
|
|
NoDimId cName -> [ dimIdColumnInsertSQL cName ]
|
|
|
|
TenantId cName -> [ dimIdColumnInsertSQL cName ]
|
2015-12-29 17:11:15 +05:30
|
|
|
FactCount scName cName ->
|
|
|
|
[ (cName, "count(" <> maybe "*" (fullColumnName fTableName) scName <> ")", False) ]
|
|
|
|
FactSum scName cName ->
|
|
|
|
[ (cName, "sum(" <> fullColumnName fTableName scName <> ")", False) ]
|
|
|
|
FactAverage scName cName ->
|
|
|
|
[ ( cName <> settingAvgCountColumSuffix
|
|
|
|
, "count(" <> fullColumnName fTableName scName <> ")"
|
|
|
|
, False
|
|
|
|
)
|
|
|
|
, ( cName <> settingAvgSumColumnSuffix
|
|
|
|
, "sum(" <> fullColumnName fTableName scName <> ")"
|
|
|
|
, False
|
|
|
|
)
|
|
|
|
]
|
|
|
|
FactCountDistinct _ cName -> [ (cName, "'{}'::json", False)]
|
|
|
|
_ -> []
|
|
|
|
|
|
|
|
dimColMap = for allDims $ \(dimFact, factTable@Table {tableName}) -> let
|
|
|
|
dimFKIdColName = factDimFKIdColumnName settingDimPrefix dimIdColName tableName
|
|
|
|
factSourceTableName = factTableName dimFact
|
|
|
|
factSourceTable = fromJust . findTable factSourceTableName $ tables
|
|
|
|
dimFKIdColumn = fromJust . findColumn dimFKIdColName $ tableColumns factSourceTable
|
|
|
|
dimLookupWhereClauses =
|
|
|
|
[ fullColumnName tableName dimColName <> " = " <> coalesceColumn defaults factSourceTableName sourceCol
|
|
|
|
| (dimColName, sourceColName) <- dimColumnMapping settingDimPrefix dimFact tableName
|
|
|
|
, let sourceCol = fromJust . findColumn sourceColName $ tableColumns factSourceTable ]
|
|
|
|
insertSQL = if factTable `elem` tables -- existing dimension table
|
|
|
|
then (if columnNullable dimFKIdColumn == Null then coalesceFKId else id)
|
|
|
|
$ fullColumnName factSourceTableName dimFKIdColName
|
2015-12-30 19:57:38 +05:30
|
|
|
else "SELECT " <> dimIdColName <> " FROM "
|
|
|
|
<> suffixTableName popMode settingTableNameSuffixTemplate tableName <> " " <> tableName
|
|
|
|
<> "\nWHERE " <> Text.intercalate "\n AND " dimLookupWhereClauses
|
2015-12-29 17:11:15 +05:30
|
|
|
in (dimFKIdColName, coalesceFKId insertSQL, True)
|
|
|
|
|
|
|
|
colMap = [ (cName, (sql, groupByColPrefix <> cName), addToGroupBy)
|
|
|
|
| (cName, sql, addToGroupBy) <- factColMap ++ dimColMap ]
|
|
|
|
|
|
|
|
joinClauses =
|
|
|
|
mapMaybe (\tName -> (\p -> "LEFT JOIN " <> tName <> "\nON "<> p) <$> joinClausePreds fTable tName)
|
|
|
|
. nub
|
|
|
|
. map (factTableName . fst)
|
|
|
|
$ allDims
|
|
|
|
|
|
|
|
timeCol = fullColumnName fTableName $ head [ cName | DimTime cName <- factColumns fact ]
|
|
|
|
|
2015-12-30 19:57:38 +05:30
|
|
|
extFactTableName = suffixTableName popMode settingTableNameSuffixTemplate
|
|
|
|
$ extractedFactTableName settingFactPrefix settingFactInfix (factName fact) settingTimeUnit
|
2015-12-29 17:11:15 +05:30
|
|
|
|
|
|
|
populateSelectSQL =
|
|
|
|
FactTablePopulateSelectSQL
|
|
|
|
{ ftpsSelectCols = map snd3 colMap
|
|
|
|
, ftpsSelectTable = fTableName
|
|
|
|
, ftpsJoinClauses = joinClauses
|
2015-12-30 19:57:38 +05:30
|
|
|
, ftpsWhereClauses =
|
|
|
|
timeCol <> " <= ?" : [ timeCol <> " > ?" | popMode == IncrementalPopulation ]
|
2015-12-29 17:11:15 +05:30
|
|
|
, ftpsGroupByCols = map ((groupByColPrefix <>) . fst3) . filter thd3 $ colMap
|
|
|
|
}
|
|
|
|
|
|
|
|
insertIntoSQL = "INSERT INTO " <> extFactTableName
|
|
|
|
<> " (\n" <> Text.intercalate ",\n " (map fst3 colMap) <> "\n)\n"
|
|
|
|
<> toSelectSQL populateSelectSQL
|
|
|
|
|
2015-12-30 19:57:38 +05:30
|
|
|
updateSQLs <- factTableUpdateSQL popMode fact groupByColPrefix populateSelectSQL
|
2015-12-29 17:11:15 +05:30
|
|
|
|
|
|
|
return $ insertIntoSQL : updateSQLs
|
|
|
|
where
|
|
|
|
groupByColPrefix = "xxff_"
|
|
|
|
|
|
|
|
joinClausePreds table oTableName =
|
|
|
|
Text.intercalate " AND "
|
|
|
|
. map (\(c1, c2) -> fullColumnName (tableName table) c1 <> " = " <> fullColumnName oTableName c2)
|
|
|
|
<$> listToMaybe [ colPairs | ForeignKey tName colPairs <- tableConstraints table
|
|
|
|
, tName == oTableName ]
|
|
|
|
|
|
|
|
toSelectSQL :: FactTablePopulateSelectSQL -> Text
|
|
|
|
toSelectSQL FactTablePopulateSelectSQL {..} =
|
|
|
|
"SELECT \n" <> joinColumnNames (map (uncurry asName) ftpsSelectCols)
|
|
|
|
<> "\nFROM " <> ftpsSelectTable
|
|
|
|
<> (if not . null $ ftpsJoinClauses
|
|
|
|
then "\n" <> Text.intercalate "\n" ftpsJoinClauses
|
|
|
|
else "")
|
|
|
|
<> (if not . null $ ftpsWhereClauses
|
|
|
|
then "\nWHERE " <> Text.intercalate "\nAND " ftpsWhereClauses
|
|
|
|
else "")
|
|
|
|
<> "\nGROUP BY \n"
|
|
|
|
<> joinColumnNames ftpsGroupByCols
|
|
|
|
where
|
|
|
|
asName sql alias = "(" <> sql <> ")" <> " as " <> alias
|
|
|
|
|