Refactors generator populate fact module to simplify the code.
- Extracts smaller functions out of big ones - Moves count distinct update related code to a separate modulemaster
parent
d27145b553
commit
18dde15427
|
@ -26,6 +26,7 @@ library
|
||||||
Ringo.Generator.Create,
|
Ringo.Generator.Create,
|
||||||
Ringo.Generator.Populate.Dimension,
|
Ringo.Generator.Populate.Dimension,
|
||||||
Ringo.Generator.Populate.Fact,
|
Ringo.Generator.Populate.Fact,
|
||||||
|
Ringo.Generator.Populate.Fact.CountDistinct,
|
||||||
Ringo.Types.Internal,
|
Ringo.Types.Internal,
|
||||||
Ringo.Utils
|
Ringo.Utils
|
||||||
build-depends: base >=4.7 && <5,
|
build-depends: base >=4.7 && <5,
|
||||||
|
|
|
@ -9,16 +9,17 @@
|
||||||
module Ringo.Generator.Populate.Fact
|
module Ringo.Generator.Populate.Fact
|
||||||
( factTablePopulationSQL
|
( factTablePopulationSQL
|
||||||
, factTablePopulationStatements
|
, factTablePopulationStatements
|
||||||
|
, ilog2FunctionString
|
||||||
) where
|
) where
|
||||||
|
|
||||||
import qualified Data.Text as Text
|
import qualified Data.Text as Text
|
||||||
|
|
||||||
import Prelude.Compat
|
import Prelude.Compat
|
||||||
import Control.Monad.Reader (Reader, asks)
|
import Control.Monad.Reader (Reader, asks)
|
||||||
import Database.HsSqlPpp.Syntax ( QueryExpr(..), Statement, makeSelect
|
import Database.HsSqlPpp.Syntax ( QueryExpr(..), ScalarExpr, Statement, makeSelect, NameComponent
|
||||||
, SelectList(..), SelectItem(..), JoinType(..) )
|
, JoinType(..) )
|
||||||
import Data.List (nub)
|
import Data.List (nub)
|
||||||
import Data.Maybe (fromJust, fromMaybe, listToMaybe)
|
import Data.Maybe (fromJust, listToMaybe)
|
||||||
import Data.Monoid ((<>))
|
import Data.Monoid ((<>))
|
||||||
import Data.Text (Text)
|
import Data.Text (Text)
|
||||||
import Text.RawString.QQ (r)
|
import Text.RawString.QQ (r)
|
||||||
|
@ -26,6 +27,7 @@ import Text.RawString.QQ (r)
|
||||||
import Ringo.Extractor.Internal
|
import Ringo.Extractor.Internal
|
||||||
import Ringo.Generator.Internal
|
import Ringo.Generator.Internal
|
||||||
import Ringo.Generator.Sql
|
import Ringo.Generator.Sql
|
||||||
|
import Ringo.Generator.Populate.Fact.CountDistinct
|
||||||
import Ringo.Types.Internal
|
import Ringo.Types.Internal
|
||||||
import Ringo.Utils
|
import Ringo.Utils
|
||||||
|
|
||||||
|
@ -55,105 +57,57 @@ $$
|
||||||
LANGUAGE 'plpgsql' IMMUTABLE;
|
LANGUAGE 'plpgsql' IMMUTABLE;
|
||||||
|]
|
|]
|
||||||
|
|
||||||
factCountDistinctUpdateStatements :: TablePopulationMode -> Fact -> Text -> QueryExpr -> Reader Env [Statement]
|
factTablePopulationSQL :: TablePopulationMode -> Fact -> Reader Env [Text]
|
||||||
factCountDistinctUpdateStatements popMode fact groupByColPrefix expr = case expr of
|
factTablePopulationSQL popMode fact = do
|
||||||
Select {selSelectList = SelectList _ origSelectItems, ..} -> do
|
stmts <- factTablePopulationStatements popMode fact
|
||||||
Settings {..} <- asks envSettings
|
return $ case stmts of
|
||||||
tables <- asks envTables
|
[] -> []
|
||||||
let fTableName = factTableName fact
|
[i] -> [ ppStatement i ]
|
||||||
fTable = fromJust . findTable fTableName $ tables
|
i:us -> [ ppStatement i, ilog2FunctionString ] ++ map ppStatement us
|
||||||
tablePKColName = head [ cName | PrimaryKey cName <- tableConstraints fTable ]
|
|
||||||
extFactTableName =
|
|
||||||
suffixTableName popMode settingTableNameSuffixTemplate
|
|
||||||
$ extractedFactTableName settingFactPrefix settingFactInfix (factName fact) settingTimeUnit
|
|
||||||
|
|
||||||
return $ forMaybe (factColumns fact) $ \FactColumn {factColTargetColumn = cName, ..} ->
|
|
||||||
case factColType of
|
|
||||||
FactCountDistinct {factColMaybeSourceColumn = scName} ->
|
|
||||||
let unqCol = cast (eqi fTableName (fromMaybe tablePKColName scName)) "text"
|
|
||||||
|
|
||||||
bucketSelectCols =
|
|
||||||
[ sia (binop "&" (app "hashtext" [ unqCol ])
|
|
||||||
(num . Text.pack . show $ bucketCount settingFactCountDistinctErrorRate - 1))
|
|
||||||
(nmc $ cName <> "_bnum")
|
|
||||||
, sia (binop "-"
|
|
||||||
(num "31")
|
|
||||||
(app "ilog2"
|
|
||||||
[ app "min" [ binop "&"
|
|
||||||
(app "hashtext" [ unqCol ])
|
|
||||||
(prefop "~" (parens (binop "<<" (num "1") (num "31"))))]]))
|
|
||||||
(nmc $ cName <> "_bhash")
|
|
||||||
]
|
|
||||||
|
|
||||||
groupByCols = map ppScalarExpr selGroupBy
|
|
||||||
selectList =
|
|
||||||
[ i | i@(SelectItem _ _ a) <- origSelectItems , a `elem` map nmc groupByCols ]
|
|
||||||
|
|
||||||
selectStmt =
|
|
||||||
makeSelect
|
|
||||||
{ selSelectList = sl $ selectList ++ bucketSelectCols
|
|
||||||
, selTref = selTref
|
|
||||||
, selWhere = binop "and" (postop "isnotnull" unqCol) <$> selWhere
|
|
||||||
, selGroupBy = selGroupBy ++ [ ei $ cName <> "_bnum" ]
|
|
||||||
}
|
|
||||||
|
|
||||||
aggSelectClause =
|
|
||||||
sia (app "json_object_agg" [ ei (cName <> "_bnum"), ei (cName <> "_bhash") ]) (nmc cName)
|
|
||||||
|
|
||||||
in Just $ update extFactTableName
|
|
||||||
[ (cName, eqi "xyz" cName) ]
|
|
||||||
[ subtrefa "xyz"
|
|
||||||
makeSelect
|
|
||||||
{ selSelectList = sl $ map (si . ei) groupByCols ++ [ aggSelectClause ]
|
|
||||||
, selTref = [ subtrefa "zyx" selectStmt ]
|
|
||||||
, selGroupBy = selGroupBy
|
|
||||||
} ] $
|
|
||||||
foldBinop "and"
|
|
||||||
[ binop "=" (eqi extFactTableName . fromJust . Text.stripPrefix groupByColPrefix $ col)
|
|
||||||
(eqi "xyz" col)
|
|
||||||
| col <- groupByCols ]
|
|
||||||
|
|
||||||
_ -> Nothing
|
|
||||||
|
|
||||||
_ -> return []
|
|
||||||
where
|
|
||||||
bucketCount :: Double -> Integer
|
|
||||||
bucketCount errorRate =
|
|
||||||
let power :: Double = fromIntegral (ceiling . logBase 2 $ (1.04 / errorRate) ** 2 :: Integer)
|
|
||||||
in ceiling $ 2 ** power
|
|
||||||
|
|
||||||
factTablePopulationStatements :: TablePopulationMode -> Fact -> Reader Env [Statement]
|
factTablePopulationStatements :: TablePopulationMode -> Fact -> Reader Env [Statement]
|
||||||
factTablePopulationStatements popMode fact = do
|
factTablePopulationStatements popMode fact = do
|
||||||
|
Settings {..} <- asks envSettings
|
||||||
allDims <- extractAllDimensionTables fact
|
allDims <- extractAllDimensionTables fact
|
||||||
|
selExprs <- selectExprs popMode fact allDims groupByColPrefix
|
||||||
|
popQueryExpr <- populateQueryExpr popMode fact allDims selExprs groupByColPrefix
|
||||||
|
|
||||||
|
let extFactTableName = suffixTableName popMode settingTableNameSuffixTemplate
|
||||||
|
$ extractedFactTableName settingFactPrefix settingFactInfix (factName fact) settingTimeUnit
|
||||||
|
insertIntoStmt = insert extFactTableName (map fst3 selExprs) popQueryExpr
|
||||||
|
|
||||||
|
updateStmts <- factCountDistinctUpdateStatements popMode fact groupByColPrefix popQueryExpr
|
||||||
|
return $ insertIntoStmt : updateStmts
|
||||||
|
where
|
||||||
|
groupByColPrefix = "xxff_"
|
||||||
|
|
||||||
|
selectExprs :: TablePopulationMode
|
||||||
|
-> Fact
|
||||||
|
-> [(Fact, Table)]
|
||||||
|
-> Text
|
||||||
|
-> Reader Env [(ColumnName, (ScalarExpr, NameComponent), Bool)]
|
||||||
|
selectExprs popMode fact allDims groupByColPrefix = do
|
||||||
|
factSelExprs <- factColumnSelectExprs fact
|
||||||
|
dimSelExprs <- dimColumnSelectExprs popMode allDims
|
||||||
|
|
||||||
|
return [ (cName, (expr, nmc $ groupByColPrefix <> cName), addToGroupBy)
|
||||||
|
| (cName, expr, addToGroupBy) <- factSelExprs ++ dimSelExprs ]
|
||||||
|
|
||||||
|
factColumnSelectExprs :: Fact -> Reader Env [(ColumnName, ScalarExpr, Bool)]
|
||||||
|
factColumnSelectExprs fact = do
|
||||||
Settings {..} <- asks envSettings
|
Settings {..} <- asks envSettings
|
||||||
tables <- asks envTables
|
tables <- asks envTables
|
||||||
defaults <- asks envTypeDefaults
|
typeDefaults <- asks envTypeDefaults
|
||||||
let fTableName = factTableName fact
|
let fTableName = factTableName fact
|
||||||
fTable = fromJust . findTable fTableName $ tables
|
fTable = fromJust . findTable fTableName $ tables
|
||||||
dimIdColName = settingDimTableIdColumnName
|
dimIdColName = settingDimTableIdColumnName
|
||||||
|
|
||||||
coalesceFKId ex =
|
|
||||||
app "coalesce" [ ex, num . Text.pack . show $ settingForeignKeyIdCoalesceValue ]
|
|
||||||
|
|
||||||
timeUnitColumnInsertSQL cName =
|
|
||||||
let colName = timeUnitColumnName dimIdColName cName settingTimeUnit
|
|
||||||
in ( colName
|
|
||||||
, cast (app "floor" [ binop "/" (extEpoch (eqi fTableName cName))
|
|
||||||
(num . Text.pack . show . timeUnitToSeconds $ settingTimeUnit) ])
|
|
||||||
"bigint"
|
|
||||||
, True
|
|
||||||
)
|
|
||||||
dimIdColumnInsertSQL cName =
|
|
||||||
let sCol = fromJust . findColumn cName $ tableColumns fTable
|
|
||||||
in (cName, coalesceColumn defaults fTableName sCol, True)
|
|
||||||
|
|
||||||
app' f cName = app f [ eqi fTableName cName ]
|
app' f cName = app f [ eqi fTableName cName ]
|
||||||
|
|
||||||
factColMap = concatFor (factColumns fact) $ \FactColumn {factColTargetColumn = cName, ..} ->
|
return $ concatFor (factColumns fact) $ \FactColumn {factColTargetColumn = cName, ..} ->
|
||||||
case factColType of
|
case factColType of
|
||||||
DimTime -> [ timeUnitColumnInsertSQL cName ]
|
DimTime -> [ timeUnitColumnSelectExpr fTableName dimIdColName settingTimeUnit cName ]
|
||||||
NoDimId -> [ dimIdColumnInsertSQL cName ]
|
NoDimId -> [ dimIdColumnSelectExpr fTableName fTable typeDefaults cName ]
|
||||||
TenantId -> [ dimIdColumnInsertSQL cName ]
|
TenantId -> [ dimIdColumnSelectExpr fTableName fTable typeDefaults cName ]
|
||||||
FactCount {..} ->
|
FactCount {..} ->
|
||||||
[ (cName, app "count" [ maybe star (eqi fTableName) factColMaybeSourceColumn ], False) ]
|
[ (cName, app "count" [ maybe star (eqi fTableName) factColMaybeSourceColumn ], False) ]
|
||||||
FactCountDistinct {..} -> [ (cName, cast (str "{}") "json", False) ]
|
FactCountDistinct {..} -> [ (cName, cast (str "{}") "json", False) ]
|
||||||
|
@ -166,20 +120,42 @@ factTablePopulationStatements popMode fact = do
|
||||||
]
|
]
|
||||||
_ -> []
|
_ -> []
|
||||||
|
|
||||||
dimColMap = for allDims $ \(dimFact, factTable@Table {tableName}) -> let
|
timeUnitColumnSelectExpr :: TableName -> ColumnName -> TimeUnit -> ColumnName -> (ColumnName, ScalarExpr, Bool)
|
||||||
|
timeUnitColumnSelectExpr fTableName dimIdColName settingTimeUnit cName =
|
||||||
|
let colName = timeUnitColumnName dimIdColName cName settingTimeUnit
|
||||||
|
in ( colName
|
||||||
|
, cast (app "floor" [ binop "/" (extEpoch (eqi fTableName cName))
|
||||||
|
(num . Text.pack . show . timeUnitToSeconds $ settingTimeUnit) ])
|
||||||
|
"bigint"
|
||||||
|
, True
|
||||||
|
)
|
||||||
|
|
||||||
|
dimIdColumnSelectExpr :: TableName -> Table -> TypeDefaults -> ColumnName -> (ColumnName, ScalarExpr, Bool)
|
||||||
|
dimIdColumnSelectExpr fTableName fTable typeDefaults cName =
|
||||||
|
let sCol = fromJust . findColumn cName $ tableColumns fTable
|
||||||
|
in (cName, coalesceColumn typeDefaults fTableName sCol, True)
|
||||||
|
|
||||||
|
dimColumnSelectExprs :: TablePopulationMode -> [(Fact, Table)] -> Reader Env [(ColumnName, ScalarExpr, Bool)]
|
||||||
|
dimColumnSelectExprs popMode allDims = do
|
||||||
|
settings@Settings {..} <- asks envSettings
|
||||||
|
tables <- asks envTables
|
||||||
|
typeDefaults <- asks envTypeDefaults
|
||||||
|
let dimIdColName = settingDimTableIdColumnName
|
||||||
|
|
||||||
|
return $ for allDims $ \(dimFact, factTable@Table {tableName}) -> let
|
||||||
dimFKIdColName =
|
dimFKIdColName =
|
||||||
factDimFKIdColumnName settingDimPrefix dimIdColName dimFact factTable tables
|
factDimFKIdColumnName settingDimPrefix dimIdColName dimFact factTable tables
|
||||||
factSourceTableName = factTableName dimFact
|
factSourceTableName = factTableName dimFact
|
||||||
factSourceTable = fromJust . findTable factSourceTableName $ tables
|
factSourceTable = fromJust . findTable factSourceTableName $ tables
|
||||||
dimFKIdColumn = fromJust . findColumn dimFKIdColName $ tableColumns factSourceTable
|
dimFKIdColumn = fromJust . findColumn dimFKIdColName $ tableColumns factSourceTable
|
||||||
dimLookupWhereClauses = Just . foldBinop "and" $
|
dimLookupWhereClauses = Just . foldBinop "and" $
|
||||||
[ binop "=" (eqi tableName dimColName) (coalesceColumn defaults factSourceTableName sourceCol)
|
[ binop "=" (eqi tableName dimColName) (coalesceColumn typeDefaults factSourceTableName sourceCol)
|
||||||
| (dimColName, sourceColName) <- dimColumnMapping settingDimPrefix dimFact tableName
|
| (dimColName, sourceColName) <- dimColumnMapping settingDimPrefix dimFact tableName
|
||||||
, let sourceCol = fromJust . findColumn sourceColName $ tableColumns factSourceTable ]
|
, let sourceCol = fromJust . findColumn sourceColName $ tableColumns factSourceTable ]
|
||||||
insertExpr = if factTable `elem` tables -- existing dimension table
|
insertExpr = if factTable `elem` tables -- existing dimension table
|
||||||
then (if columnNullable dimFKIdColumn == Null then coalesceFKId else id)
|
then (if columnNullable dimFKIdColumn == Null then coalesceFKId settings else id)
|
||||||
$ eqi factSourceTableName dimFKIdColName
|
$ eqi factSourceTableName dimFKIdColName
|
||||||
else coalesceFKId . subQueryExp $
|
else coalesceFKId settings . subQueryExp $
|
||||||
makeSelect
|
makeSelect
|
||||||
{ selSelectList = sl [ si $ ei dimIdColName ]
|
{ selSelectList = sl [ si $ ei dimIdColName ]
|
||||||
, selTref =
|
, selTref =
|
||||||
|
@ -187,49 +163,39 @@ factTablePopulationStatements popMode fact = do
|
||||||
, selWhere = dimLookupWhereClauses
|
, selWhere = dimLookupWhereClauses
|
||||||
}
|
}
|
||||||
in (dimFKIdColName, insertExpr, True)
|
in (dimFKIdColName, insertExpr, True)
|
||||||
|
where
|
||||||
|
coalesceFKId Settings {..} ex =
|
||||||
|
app "coalesce" [ ex, num . Text.pack . show $ settingForeignKeyIdCoalesceValue ]
|
||||||
|
|
||||||
colMap = [ (cName, (expr, nmc $ groupByColPrefix <> cName), addToGroupBy)
|
populateQueryExpr :: TablePopulationMode
|
||||||
| (cName, expr, addToGroupBy) <- factColMap ++ dimColMap ]
|
-> Fact
|
||||||
|
-> [(Fact, Table)]
|
||||||
|
-> [(ColumnName, (ScalarExpr, NameComponent), Bool)]
|
||||||
|
-> Text
|
||||||
|
-> Reader Env QueryExpr
|
||||||
|
populateQueryExpr popMode fact allDims selExprs groupByColPrefix = do
|
||||||
|
Settings {..} <- asks envSettings
|
||||||
|
tables <- asks envTables
|
||||||
|
let fTableName = factTableName fact
|
||||||
|
fTable = fromJust . findTable fTableName $ tables
|
||||||
joinClauses =
|
joinClauses =
|
||||||
map (tref &&& joinClausePreds fTable)
|
map (tref &&& joinClausePreds fTable)
|
||||||
. filter (/= fTableName)
|
. filter (/= fTableName)
|
||||||
. nub
|
. nub
|
||||||
. map (factTableName . fst)
|
. map (factTableName . fst)
|
||||||
$ allDims
|
$ allDims
|
||||||
|
|
||||||
timeCol = eqi fTableName $ head [ cName | FactColumn cName DimTime <- factColumns fact ]
|
timeCol = eqi fTableName $ head [ cName | FactColumn cName DimTime <- factColumns fact ]
|
||||||
|
return $ makeSelect
|
||||||
extFactTableName = suffixTableName popMode settingTableNameSuffixTemplate
|
{ selSelectList = sl . map (uncurry sia . snd3) $ selExprs
|
||||||
$ extractedFactTableName settingFactPrefix settingFactInfix (factName fact) settingTimeUnit
|
|
||||||
|
|
||||||
populateSelectExpr =
|
|
||||||
makeSelect
|
|
||||||
{ selSelectList = sl . map (uncurry sia . snd3) $ colMap
|
|
||||||
, selTref = [ foldl (\tf (t, oc) -> tjoin tf LeftOuter t oc) (tref fTableName) joinClauses ]
|
, selTref = [ foldl (\tf (t, oc) -> tjoin tf LeftOuter t oc) (tref fTableName) joinClauses ]
|
||||||
, selWhere = Just . foldBinop "and" $
|
, selWhere = Just . foldBinop "and" $
|
||||||
binop "<" timeCol placeholder :
|
binop "<" timeCol placeholder :
|
||||||
[ binop ">=" timeCol placeholder | popMode == IncrementalPopulation ]
|
[ binop ">=" timeCol placeholder | popMode == IncrementalPopulation ]
|
||||||
, selGroupBy = map (ei . (groupByColPrefix <>) . fst3) . filter thd3 $ colMap
|
, selGroupBy = map (ei . (groupByColPrefix <>) . fst3) . filter thd3 $ selExprs
|
||||||
}
|
}
|
||||||
|
|
||||||
insertIntoStmt = insert extFactTableName (map fst3 colMap) populateSelectExpr
|
|
||||||
|
|
||||||
updateStmts <- factCountDistinctUpdateStatements popMode fact groupByColPrefix populateSelectExpr
|
|
||||||
return $ insertIntoStmt : updateStmts
|
|
||||||
where
|
where
|
||||||
groupByColPrefix = "xxff_"
|
|
||||||
|
|
||||||
joinClausePreds table oTableName =
|
joinClausePreds table oTableName =
|
||||||
foldBinop "and"
|
foldBinop "and"
|
||||||
. map (\(c1, c2) -> binop "=" (eqi (tableName table) c1) (eqi oTableName c2))
|
. map (\(c1, c2) -> binop "=" (eqi (tableName table) c1) (eqi oTableName c2))
|
||||||
<$> listToMaybe [ colPairs | ForeignKey tName colPairs <- tableConstraints table
|
<$> listToMaybe [ colPairs | ForeignKey tName colPairs <- tableConstraints table
|
||||||
, tName == oTableName ]
|
, tName == oTableName ]
|
||||||
|
|
||||||
factTablePopulationSQL :: TablePopulationMode -> Fact -> Reader Env [Text]
|
|
||||||
factTablePopulationSQL popMode fact = do
|
|
||||||
stmts <- factTablePopulationStatements popMode fact
|
|
||||||
return $ case stmts of
|
|
||||||
[] -> []
|
|
||||||
[i] -> [ ppStatement i ]
|
|
||||||
i:us -> [ ppStatement i, ilog2FunctionString ] ++ map ppStatement us
|
|
||||||
|
|
|
@ -0,0 +1,100 @@
|
||||||
|
{-# LANGUAGE OverloadedStrings #-}
|
||||||
|
{-# LANGUAGE RecordWildCards #-}
|
||||||
|
{-# LANGUAGE ScopedTypeVariables #-}
|
||||||
|
{-# LANGUAGE NoImplicitPrelude #-}
|
||||||
|
{-# LANGUAGE NamedFieldPuns #-}
|
||||||
|
{-# LANGUAGE GADTs #-}
|
||||||
|
|
||||||
|
module Ringo.Generator.Populate.Fact.CountDistinct (factCountDistinctUpdateStatements) where
|
||||||
|
|
||||||
|
import qualified Data.Text as Text
|
||||||
|
|
||||||
|
import Prelude.Compat
|
||||||
|
import Control.Monad (forM)
|
||||||
|
import Control.Monad.Reader (Reader, asks)
|
||||||
|
import Database.HsSqlPpp.Syntax ( QueryExpr(..), ScalarExpr, Statement, makeSelect
|
||||||
|
, SelectList(..), SelectItem(..) )
|
||||||
|
import Data.Maybe (fromJust, fromMaybe, catMaybes)
|
||||||
|
import Data.Monoid ((<>))
|
||||||
|
import Data.Text (Text)
|
||||||
|
|
||||||
|
import Ringo.Extractor.Internal
|
||||||
|
import Ringo.Generator.Internal
|
||||||
|
import Ringo.Generator.Sql
|
||||||
|
import Ringo.Types.Internal
|
||||||
|
|
||||||
|
factCountDistinctUpdateStatements :: TablePopulationMode -> Fact -> Text -> QueryExpr -> Reader Env [Statement]
|
||||||
|
factCountDistinctUpdateStatements popMode fact groupByColPrefix expr = case expr of
|
||||||
|
select@Select {..} -> do
|
||||||
|
Settings {..} <- asks envSettings
|
||||||
|
let extFactTableName =
|
||||||
|
suffixTableName popMode settingTableNameSuffixTemplate
|
||||||
|
$ extractedFactTableName settingFactPrefix settingFactInfix (factName fact) settingTimeUnit
|
||||||
|
|
||||||
|
fmap catMaybes $ forM (factColumns fact) $ \FactColumn {factColTargetColumn = cName, ..} ->
|
||||||
|
case factColType of
|
||||||
|
FactCountDistinct {factColMaybeSourceColumn = scName} -> do
|
||||||
|
let groupByCols = map ppScalarExpr selGroupBy
|
||||||
|
selectStmt <- queryExpr fact cName scName groupByCols select
|
||||||
|
let aggSelectClause =
|
||||||
|
sia (app "json_object_agg" [ ei (cName <> "_bnum"), ei (cName <> "_bhash") ]) (nmc cName)
|
||||||
|
|
||||||
|
return $ Just $ update extFactTableName
|
||||||
|
[ (cName, eqi "xyz" cName) ]
|
||||||
|
[ subtrefa "xyz"
|
||||||
|
makeSelect
|
||||||
|
{ selSelectList = sl $ map (si . ei) groupByCols ++ [ aggSelectClause ]
|
||||||
|
, selTref = [ subtrefa "zyx" selectStmt ]
|
||||||
|
, selGroupBy = selGroupBy
|
||||||
|
} ] $
|
||||||
|
foldBinop "and"
|
||||||
|
[ binop "=" (eqi extFactTableName . fromJust . Text.stripPrefix groupByColPrefix $ col)
|
||||||
|
(eqi "xyz" col)
|
||||||
|
| col <- groupByCols ]
|
||||||
|
|
||||||
|
_ -> return Nothing
|
||||||
|
|
||||||
|
_ -> return []
|
||||||
|
|
||||||
|
queryExpr :: Fact -> ColumnName -> Maybe ColumnName -> [ColumnName] -> QueryExpr -> Reader Env QueryExpr
|
||||||
|
queryExpr fact targetCol sourceCol groupByCols select = case select of
|
||||||
|
Select {selSelectList = SelectList _ origSelectItems, ..} -> do
|
||||||
|
Settings {..} <- asks envSettings
|
||||||
|
tables <- asks envTables
|
||||||
|
|
||||||
|
let fTableName = factTableName fact
|
||||||
|
fTable = fromJust . findTable fTableName $ tables
|
||||||
|
tablePKColName = head [ cName | PrimaryKey cName <- tableConstraints fTable ]
|
||||||
|
unqCol = cast (eqi fTableName (fromMaybe tablePKColName sourceCol)) "text"
|
||||||
|
selectList = [ i | i@(SelectItem _ _ a) <- origSelectItems , a `elem` map nmc groupByCols ]
|
||||||
|
bucketSelectList <- bucketSelectItems targetCol unqCol
|
||||||
|
|
||||||
|
return $ makeSelect
|
||||||
|
{ selSelectList = sl $ selectList ++ bucketSelectList
|
||||||
|
, selTref = selTref
|
||||||
|
, selWhere = binop "and" (postop "isnotnull" unqCol) <$> selWhere
|
||||||
|
, selGroupBy = selGroupBy ++ [ ei $ targetCol <> "_bnum" ]
|
||||||
|
}
|
||||||
|
|
||||||
|
_ -> error "Must be a Select"
|
||||||
|
|
||||||
|
bucketSelectItems :: ColumnName -> ScalarExpr -> Reader Env [SelectItem]
|
||||||
|
bucketSelectItems targetCol unqCol = do
|
||||||
|
Settings {..} <- asks envSettings
|
||||||
|
|
||||||
|
return [ sia (binop "&" (app "hashtext" [ unqCol ])
|
||||||
|
(num . Text.pack . show $ bucketCount settingFactCountDistinctErrorRate - 1))
|
||||||
|
(nmc $ targetCol <> "_bnum")
|
||||||
|
, sia (binop "-"
|
||||||
|
(num "31")
|
||||||
|
(app "ilog2"
|
||||||
|
[ app "min" [ binop "&"
|
||||||
|
(app "hashtext" [ unqCol ])
|
||||||
|
(prefop "~" (parens (binop "<<" (num "1") (num "31"))))]]))
|
||||||
|
(nmc $ targetCol <> "_bhash")
|
||||||
|
]
|
||||||
|
where
|
||||||
|
bucketCount :: Double -> Integer
|
||||||
|
bucketCount errorRate =
|
||||||
|
let power :: Double = fromIntegral (ceiling . logBase 2 $ (1.04 / errorRate) ** 2 :: Integer)
|
||||||
|
in ceiling $ 2 ** power
|
Loading…
Reference in New Issue