|
|
|
@ -1,3 +1,4 @@
|
|
|
|
|
{-# LANGUAGE QuasiQuotes #-}
|
|
|
|
|
module Ringo.Generator.Populate.Fact (factTablePopulateSQL) where
|
|
|
|
|
|
|
|
|
|
import qualified Data.Text as Text
|
|
|
|
@ -12,12 +13,38 @@ import Data.List (nub)
|
|
|
|
|
import Data.Maybe (fromJust, fromMaybe, mapMaybe, listToMaybe)
|
|
|
|
|
import Data.Monoid ((<>))
|
|
|
|
|
import Data.Text (Text)
|
|
|
|
|
import Text.RawString.QQ (r)
|
|
|
|
|
|
|
|
|
|
import Ringo.Extractor.Internal
|
|
|
|
|
import Ringo.Generator.Internal
|
|
|
|
|
import Ringo.Types
|
|
|
|
|
import Ringo.Utils
|
|
|
|
|
|
|
|
|
|
ilog2FunctionString :: Text
|
|
|
|
|
ilog2FunctionString = [r|CREATE OR REPLACE FUNCTION ilog2(v integer)
|
|
|
|
|
RETURNS integer AS
|
|
|
|
|
$$
|
|
|
|
|
DECLARE
|
|
|
|
|
r integer;
|
|
|
|
|
shift integer;
|
|
|
|
|
BEGIN
|
|
|
|
|
IF v > x'FFFF'::integer THEN r := 1 << 4; ELSE r := 0 << 4; END IF;
|
|
|
|
|
v := v >> r;
|
|
|
|
|
IF v > x'FF'::integer THEN shift := 1 << 3; ELSE shift := 0 << 3; END IF;
|
|
|
|
|
v := v >> shift;
|
|
|
|
|
r := r | shift;
|
|
|
|
|
IF v > x'F'::integer THEN shift := 1 << 2; ELSE shift := 0 << 2; END IF;
|
|
|
|
|
v := v >> shift;
|
|
|
|
|
r := r | shift;
|
|
|
|
|
IF v > x'3'::integer THEN shift := 1 << 1; ELSE shift := 0 << 3; END IF;
|
|
|
|
|
v := v >> shift;
|
|
|
|
|
r := r | shift;
|
|
|
|
|
r := r | (v >> 1);
|
|
|
|
|
RETURN r;
|
|
|
|
|
END;
|
|
|
|
|
$$
|
|
|
|
|
LANGUAGE 'plpgsql' IMMUTABLE|]
|
|
|
|
|
|
|
|
|
|
data FactTablePopulateSelectSQL = FactTablePopulateSelectSQL
|
|
|
|
|
{ ftpsSelectCols :: ![(Text, Text)]
|
|
|
|
|
, ftpsSelectTable :: !Text
|
|
|
|
@ -37,41 +64,42 @@ factTableUpdateSQL fact groupByColPrefix populateSelectSQL@FactTablePopulateSele
|
|
|
|
|
extFactTableName =
|
|
|
|
|
extractedFactTableName settingFactPrefix settingFactInfix (factName fact) settingTimeUnit
|
|
|
|
|
|
|
|
|
|
return $ for countDistinctCols $ \(FactCountDistinct scName cName) ->
|
|
|
|
|
let unqCol = fullColumnName fTableName (fromMaybe tablePKColName scName) <> "::text"
|
|
|
|
|
|
|
|
|
|
bucketSelectCols =
|
|
|
|
|
[ ( "hashtext(" <> unqCol <> ") & "
|
|
|
|
|
<> Text.pack (show $ bucketCount settingFactCountDistinctErrorRate - 1)
|
|
|
|
|
, cName <> "_bnum"
|
|
|
|
|
)
|
|
|
|
|
, ( "31 - floor(log(2, min(hashtext(" <> unqCol <> ") & ~(1 << 31))))::int"
|
|
|
|
|
, cName <> "_bhash"
|
|
|
|
|
)
|
|
|
|
|
]
|
|
|
|
|
|
|
|
|
|
selectSQL = toSelectSQL $
|
|
|
|
|
populateSelectSQL
|
|
|
|
|
{ ftpsSelectCols = filter ((`elem` ftpsGroupByCols) . snd) ftpsSelectCols ++ bucketSelectCols
|
|
|
|
|
, ftpsGroupByCols = ftpsGroupByCols ++ [ cName <> "_bnum" ]
|
|
|
|
|
, ftpsWhereClauses = ftpsWhereClauses ++ [ unqCol <> " IS NOT NULL" ]
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
aggSelectClause =
|
|
|
|
|
"json_object_agg(" <> cName <> "_bnum, " <> cName <> "_bhash) AS " <> cName
|
|
|
|
|
|
|
|
|
|
in "UPDATE " <> extFactTableName
|
|
|
|
|
<> "\nSET " <> cName <> " = " <> fullColumnName "xyz" cName
|
|
|
|
|
<> "\nFROM ("
|
|
|
|
|
<> "\nSELECT " <> joinColumnNames (ftpsGroupByCols ++ [aggSelectClause])
|
|
|
|
|
<> "\nFROM (\n" <> selectSQL <> "\n) zyx"
|
|
|
|
|
<> "\nGROUP BY \n" <> joinColumnNames ftpsGroupByCols
|
|
|
|
|
<> "\n) xyz"
|
|
|
|
|
<> "\n WHERE\n"
|
|
|
|
|
<> Text.intercalate "\nAND "
|
|
|
|
|
[ fullColumnName extFactTableName .fromJust . Text.stripPrefix groupByColPrefix $ col
|
|
|
|
|
<> " = " <> fullColumnName "xyz" col
|
|
|
|
|
| col <- ftpsGroupByCols ]
|
|
|
|
|
return . (\xs -> if null xs then xs else ilog2FunctionString : xs)
|
|
|
|
|
$ for countDistinctCols $ \(FactCountDistinct scName cName) ->
|
|
|
|
|
let unqCol = fullColumnName fTableName (fromMaybe tablePKColName scName) <> "::text"
|
|
|
|
|
|
|
|
|
|
bucketSelectCols =
|
|
|
|
|
[ ( "hashtext(" <> unqCol <> ") & "
|
|
|
|
|
<> Text.pack (show $ bucketCount settingFactCountDistinctErrorRate - 1)
|
|
|
|
|
, cName <> "_bnum"
|
|
|
|
|
)
|
|
|
|
|
, ( "31 - ilog2(min(hashtext(" <> unqCol <> ") & ~(1 << 31)))"
|
|
|
|
|
, cName <> "_bhash"
|
|
|
|
|
)
|
|
|
|
|
]
|
|
|
|
|
|
|
|
|
|
selectSQL = toSelectSQL $
|
|
|
|
|
populateSelectSQL
|
|
|
|
|
{ ftpsSelectCols = filter ((`elem` ftpsGroupByCols) . snd) ftpsSelectCols ++ bucketSelectCols
|
|
|
|
|
, ftpsGroupByCols = ftpsGroupByCols ++ [ cName <> "_bnum" ]
|
|
|
|
|
, ftpsWhereClauses = ftpsWhereClauses ++ [ unqCol <> " IS NOT NULL" ]
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
aggSelectClause =
|
|
|
|
|
"json_object_agg(" <> cName <> "_bnum, " <> cName <> "_bhash) AS " <> cName
|
|
|
|
|
|
|
|
|
|
in "UPDATE " <> extFactTableName
|
|
|
|
|
<> "\nSET " <> cName <> " = " <> fullColumnName "xyz" cName
|
|
|
|
|
<> "\nFROM ("
|
|
|
|
|
<> "\nSELECT " <> joinColumnNames (ftpsGroupByCols ++ [aggSelectClause])
|
|
|
|
|
<> "\nFROM (\n" <> selectSQL <> "\n) zyx"
|
|
|
|
|
<> "\nGROUP BY \n" <> joinColumnNames ftpsGroupByCols
|
|
|
|
|
<> "\n) xyz"
|
|
|
|
|
<> "\n WHERE\n"
|
|
|
|
|
<> Text.intercalate "\nAND "
|
|
|
|
|
[ fullColumnName extFactTableName .fromJust . Text.stripPrefix groupByColPrefix $ col
|
|
|
|
|
<> " = " <> fullColumnName "xyz" col
|
|
|
|
|
| col <- ftpsGroupByCols ]
|
|
|
|
|
where
|
|
|
|
|
bucketCount :: Double -> Integer
|
|
|
|
|
bucketCount errorRate =
|
|
|
|
|