ringo/ringo/src/Ringo/Generator/Populate/Fact.hs

238 lines
10 KiB
Haskell

{-# LANGUAGE OverloadedStrings #-}
{-# LANGUAGE RecordWildCards #-}
{-# LANGUAGE ScopedTypeVariables #-}
{-# LANGUAGE CPP #-}
{-# LANGUAGE NamedFieldPuns #-}
{-# LANGUAGE QuasiQuotes #-}
{-# LANGUAGE GADTs #-}
module Ringo.Generator.Populate.Fact (factTablePopulateSQL) where
import qualified Data.Text as Text
#if MIN_VERSION_base(4,8,0)
#else
import Control.Applicative ((<$>))
#endif
import Control.Monad.Reader (Reader, asks, withReader)
import Database.HsSqlPpp.Syntax ( QueryExpr(..), Statement, makeSelect
, SelectList(..), SelectItem(..), JoinType(..) )
import Data.List (nub)
import Data.Maybe (fromJust, fromMaybe, listToMaybe)
import Data.Monoid ((<>))
import Data.Text (Text)
import Text.RawString.QQ (r)
import Ringo.Extractor.Internal
import Ringo.Generator.Internal
import Ringo.Generator.Sql
import Ringo.Types
import Ringo.Utils
ilog2FunctionString :: Text
ilog2FunctionString = [r|CREATE OR REPLACE FUNCTION ilog2(v integer)
RETURNS integer AS
$$
DECLARE
r integer;
shift integer;
BEGIN
IF v > x'FFFF'::integer THEN r := 1 << 4; ELSE r := 0 << 4; END IF;
v := v >> r;
IF v > x'FF'::integer THEN shift := 1 << 3; ELSE shift := 0 << 3; END IF;
v := v >> shift;
r := r | shift;
IF v > x'F'::integer THEN shift := 1 << 2; ELSE shift := 0 << 2; END IF;
v := v >> shift;
r := r | shift;
IF v > x'3'::integer THEN shift := 1 << 1; ELSE shift := 0 << 3; END IF;
v := v >> shift;
r := r | shift;
r := r | (v >> 1);
RETURN r;
END;
$$
LANGUAGE 'plpgsql' IMMUTABLE;
|]
factCountDistinctUpdateStmts :: TablePopulationMode -> Fact -> Text -> QueryExpr -> Reader EnvV [Statement]
factCountDistinctUpdateStmts popMode fact groupByColPrefix expr = case expr of
Select {selSelectList = SelectList _ origSelectItems, ..} -> do
Settings {..} <- asks envSettings
tables <- asks envTables
let fTableName = factTableName fact
fTable = fromJust . findTable fTableName $ tables
tablePKColName = head [ cName | PrimaryKey cName <- tableConstraints fTable ]
extFactTableName =
suffixTableName popMode settingTableNameSuffixTemplate
$ extractedFactTableName settingFactPrefix settingFactInfix (factName fact) settingTimeUnit
return $ forMaybe (factColumns fact) $ \FactColumn {factColTargetColumn = cName, ..} ->
case factColType of
FactCountDistinct {factColMaybeSourceColumn = scName} ->
let unqCol = cast (eqi fTableName (fromMaybe tablePKColName scName)) "text"
bucketSelectCols =
[ sia (binop "&" (app "hashtext" [ unqCol ])
(num . Text.pack . show $ bucketCount settingFactCountDistinctErrorRate - 1))
(nmc $ cName <> "_bnum")
, sia (binop "-"
(num "31")
(app "ilog2"
[ app "min" [ binop "&"
(app "hashtext" [ unqCol ])
(prefop "~" (parens (binop "<<" (num "1") (num "31"))))]]))
(nmc $ cName <> "_bhash")
]
groupByCols = map ppScalarExpr selGroupBy
selectList =
[ i | i@(SelectItem _ _ a) <- origSelectItems , a `elem` map nmc groupByCols ]
selectStmt =
makeSelect
{ selSelectList = sl $ selectList ++ bucketSelectCols
, selTref = selTref
, selWhere = binop "and" (postop "isnotnull" unqCol) <$> selWhere
, selGroupBy = selGroupBy ++ [ ei $ cName <> "_bnum" ]
}
aggSelectClause =
sia (app "json_object_agg" [ ei (cName <> "_bnum"), ei (cName <> "_bhash") ]) (nmc cName)
in Just $ update extFactTableName
[ (cName, eqi "xyz" cName) ]
[ subtrefa "xyz"
makeSelect
{ selSelectList = sl $ map (si . ei) groupByCols ++ [ aggSelectClause ]
, selTref = [ subtrefa "zyx" selectStmt ]
, selGroupBy = selGroupBy
} ] $
foldBinop "and"
[ binop "=" (eqi extFactTableName . fromJust . Text.stripPrefix groupByColPrefix $ col)
(eqi "xyz" col)
| col <- groupByCols ]
_ -> Nothing
_ -> return []
where
bucketCount :: Double -> Integer
bucketCount errorRate =
let power :: Double = fromIntegral (ceiling . logBase 2 $ (1.04 / errorRate) ** 2 :: Integer)
in ceiling $ 2 ** power
factTablePopulateStmts :: TablePopulationMode -> Fact -> Reader Env [Statement]
factTablePopulateStmts popMode fact = do
allDims <- extractAllDimensionTables fact
withReader envView $ do
Settings {..} <- asks envSettings
tables <- asks envTables
defaults <- asks envTypeDefaults
let fTableName = factTableName fact
fTable = fromJust . findTable fTableName $ tables
dimIdColName = settingDimTableIdColumnName
coalesceFKId ex =
app "coalesce" [ ex, num . Text.pack . show $ settingForeignKeyIdCoalesceValue ]
timeUnitColumnInsertSQL cName =
let colName = timeUnitColumnName dimIdColName cName settingTimeUnit
in ( colName
, cast (app "floor" [ binop "/" (extEpoch (eqi fTableName cName))
(num . Text.pack . show . timeUnitToSeconds $ settingTimeUnit) ])
"bigint"
, True
)
dimIdColumnInsertSQL cName =
let sCol = fromJust . findColumn cName $ tableColumns fTable
in (cName, coalesceColumn defaults fTableName sCol, True)
app' f cName = app f [ eqi fTableName cName ]
factColMap = concatFor (factColumns fact) $ \FactColumn {factColTargetColumn = cName, ..} ->
case factColType of
DimTime -> [ timeUnitColumnInsertSQL cName ]
NoDimId -> [ dimIdColumnInsertSQL cName ]
TenantId -> [ dimIdColumnInsertSQL cName ]
FactCount {..} ->
[ (cName, app "count" [ maybe star (eqi fTableName) factColMaybeSourceColumn ], False) ]
FactCountDistinct {..} -> [ (cName, cast (str "{}") "json", False) ]
FactSum {..} -> [ (cName, app' "sum" factColSourceColumn, False) ]
FactMax {..} -> [ (cName, app' "max" factColSourceColumn, False) ]
FactMin {..} -> [ (cName, app' "min" factColSourceColumn, False) ]
FactAverage {..} ->
[ ( cName <> settingAvgCountColumSuffix, app' "count" factColSourceColumn, False )
, ( cName <> settingAvgSumColumnSuffix , app' "sum" factColSourceColumn , False)
]
_ -> []
dimColMap = for allDims $ \(dimFact, factTable@Table {tableName}) -> let
dimFKIdColName =
factDimFKIdColumnName settingDimPrefix dimIdColName dimFact factTable tables
factSourceTableName = factTableName dimFact
factSourceTable = fromJust . findTable factSourceTableName $ tables
dimFKIdColumn = fromJust . findColumn dimFKIdColName $ tableColumns factSourceTable
dimLookupWhereClauses = Just . foldBinop "and" $
[ binop "=" (eqi tableName dimColName) (coalesceColumn defaults factSourceTableName sourceCol)
| (dimColName, sourceColName) <- dimColumnMapping settingDimPrefix dimFact tableName
, let sourceCol = fromJust . findColumn sourceColName $ tableColumns factSourceTable ]
insertExpr = if factTable `elem` tables -- existing dimension table
then (if columnNullable dimFKIdColumn == Null then coalesceFKId else id)
$ eqi factSourceTableName dimFKIdColName
else coalesceFKId . subQueryExp $
makeSelect
{ selSelectList = sl [ si $ ei dimIdColName ]
, selTref =
[ trefa (suffixTableName popMode settingTableNameSuffixTemplate tableName) tableName ]
, selWhere = dimLookupWhereClauses
}
in (dimFKIdColName, insertExpr, True)
colMap = [ (cName, (expr, nmc $ groupByColPrefix <> cName), addToGroupBy)
| (cName, expr, addToGroupBy) <- factColMap ++ dimColMap ]
joinClauses =
map (tref &&& joinClausePreds fTable)
. filter (/= fTableName)
. nub
. map (factTableName . fst)
$ allDims
timeCol = eqi fTableName $ head [ cName | FactColumn cName DimTime <- factColumns fact ]
extFactTableName = suffixTableName popMode settingTableNameSuffixTemplate
$ extractedFactTableName settingFactPrefix settingFactInfix (factName fact) settingTimeUnit
populateSelectExpr =
makeSelect
{ selSelectList = sl . map (uncurry sia . snd3) $ colMap
, selTref = [ foldl (\tf (t, oc) -> tjoin tf LeftOuter t oc) (tref fTableName) joinClauses ]
, selWhere = Just . foldBinop "and" $
binop "<" timeCol placeholder :
[ binop ">=" timeCol placeholder | popMode == IncrementalPopulation ]
, selGroupBy = map (ei . (groupByColPrefix <>) . fst3) . filter thd3 $ colMap
}
insertIntoStmt = insert extFactTableName (map fst3 colMap) populateSelectExpr
updateStmts <- factCountDistinctUpdateStmts popMode fact groupByColPrefix populateSelectExpr
return $ insertIntoStmt : updateStmts
where
groupByColPrefix = "xxff_"
joinClausePreds table oTableName =
foldBinop "and"
. map (\(c1, c2) -> binop "=" (eqi (tableName table) c1) (eqi oTableName c2))
<$> listToMaybe [ colPairs | ForeignKey tName colPairs <- tableConstraints table
, tName == oTableName ]
factTablePopulateSQL :: TablePopulationMode -> Fact -> Reader Env [Text]
factTablePopulateSQL popMode fact = do
stmts <- factTablePopulateStmts popMode fact
return $ case stmts of
[] -> []
[i] -> [ ppStatement i ]
i:us -> [ ppStatement i, ilog2FunctionString ] ++ map ppStatement us