Changes fact populatation generator to use hssqlppp internally.

pull/1/head
Abhinav Sarkar 7 years ago
parent d33dcbe33c
commit 46e5e43856
  1. 4
      src/Ringo/Generator/Create.hs
  2. 19
      src/Ringo/Generator/Internal.hs
  3. 8
      src/Ringo/Generator/Populate/Dimension.hs
  4. 224
      src/Ringo/Generator/Populate/Fact.hs
  5. 52
      src/Ringo/Generator/Sql.hs

@ -47,8 +47,8 @@ tableDefnStmts Table {..} = do
tableDefnSQL :: Table -> (Table -> Reader Env [Statement]) -> Reader Env [Text]
tableDefnSQL table indexFn = do
ds <- map ppSQL <$> tableDefnStmts table
is <- map (\st -> ppSQL st <> ";\n") <$> indexFn table
ds <- map ppStatement <$> tableDefnStmts table
is <- map (\st -> ppStatement st <> ";\n") <$> indexFn table
return $ ds ++ is
dimensionTableDefnSQL :: Table -> Reader Env [Text]

@ -6,31 +6,22 @@ import qualified Data.Map as Map
import qualified Data.Text as Text
import Database.HsSqlPpp.Syntax (ScalarExpr)
import Data.List (find)
import Data.Monoid ((<>))
import Data.Text (Text)
import Data.List (find)
import Data.Monoid ((<>))
import Data.Text (Text)
import Ringo.Extractor.Internal
import Ringo.Generator.Sql
import Ringo.Types
joinColumnNames :: [ColumnName] -> Text
joinColumnNames = Text.intercalate ",\n"
fullColumnName :: TableName -> ColumnName -> ColumnName
fullColumnName tName cName = tName <> "." <> cName
dimColumnMapping :: Text -> Fact -> TableName -> [(ColumnName, ColumnName)]
dimColumnMapping dimPrefix fact dimTableName =
[ (dimColumnName dName cName, cName)
| DimVal dName cName <- factColumns fact
, dimPrefix <> dName == dimTableName ]
coalesceColumn :: TypeDefaults -> TableName -> Column -> Text
coalesceColumn defaults tName = ppScalarExpr . coalesceColumn' defaults tName
coalesceColumn' :: TypeDefaults -> TableName -> Column -> ScalarExpr
coalesceColumn' defaults tName Column{..} =
coalesceColumn :: TypeDefaults -> TableName -> Column -> ScalarExpr
coalesceColumn defaults tName Column{..} =
if columnNullable == Null
then app "coalesce" [fqColName, num $ defVal columnType]
else fqColName

@ -20,16 +20,16 @@ import Ringo.Types
dimensionTablePopulateSQL :: TablePopulationMode -> Fact -> TableName -> Reader Env Text
dimensionTablePopulateSQL popMode fact dimTableName =
ppSQL <$> dimensionTablePopulateSQL' popMode fact dimTableName
ppStatement <$> dimensionTablePopulateStmt popMode fact dimTableName
dimensionTablePopulateSQL' :: TablePopulationMode -> Fact -> TableName -> Reader Env Statement
dimensionTablePopulateSQL' popMode fact dimTableName = do
dimensionTablePopulateStmt :: TablePopulationMode -> Fact -> TableName -> Reader Env Statement
dimensionTablePopulateStmt popMode fact dimTableName = do
Settings {..} <- asks envSettings
tables <- asks envTables
defaults <- asks envTypeDefaults
let factTable = fromJust $ findTable (factTableName fact) tables
colMapping = dimColumnMapping settingDimPrefix fact dimTableName
selectCols = [ flip sia (nmc cName) $ coalesceColumn' defaults (factTableName fact) col
selectCols = [ flip sia (nmc cName) $ coalesceColumn defaults (factTableName fact) col
| (_, cName) <- colMapping
, let col = fromJust . findColumn cName $ tableColumns factTable ]
timeCol = head [ cName | DimTime cName <- factColumns fact ]

@ -10,18 +10,21 @@ import qualified Data.Text as Text
#if MIN_VERSION_base(4,8,0)
#else
import Control.Applicative ((<$>))
import Control.Applicative ((<$>))
#endif
import Control.Monad.Reader (Reader, asks)
import Data.List (nub)
import Data.Maybe (fromJust, fromMaybe, mapMaybe, listToMaybe)
import Data.Monoid ((<>))
import Data.Text (Text)
import Text.RawString.QQ (r)
import Control.Monad.Reader (Reader, asks)
import Database.HsSqlPpp.Syntax ( QueryExpr(..), Statement, makeSelect
, SelectList(..), SelectItem(..), JoinType(..) )
import Data.List (nub)
import Data.Maybe (fromJust, fromMaybe, listToMaybe)
import Data.Monoid ((<>))
import Data.Text (Text)
import Text.RawString.QQ (r)
import Ringo.Extractor.Internal
import Ringo.Generator.Internal
import Ringo.Generator.Sql
import Ringo.Types
import Ringo.Utils
@ -48,18 +51,12 @@ BEGIN
RETURN r;
END;
$$
LANGUAGE 'plpgsql' IMMUTABLE|]
data FactTablePopulateSelectSQL = FactTablePopulateSelectSQL
{ ftpsSelectCols :: ![(Text, Text)]
, ftpsSelectTable :: !Text
, ftpsJoinClauses :: ![Text]
, ftpsWhereClauses :: ![Text]
, ftpsGroupByCols :: ![Text]
} deriving (Show, Eq)
factTableUpdateSQL :: TablePopulationMode -> Fact -> Text -> FactTablePopulateSelectSQL -> Reader Env [Text]
factTableUpdateSQL popMode fact groupByColPrefix populateSelectSQL@FactTablePopulateSelectSQL {..} = do
LANGUAGE 'plpgsql' IMMUTABLE;
|]
factCountDistinctUpdateStmts :: TablePopulationMode -> Fact -> Text -> QueryExpr -> Reader Env [Statement]
factCountDistinctUpdateStmts
popMode fact groupByColPrefix ~Select {selSelectList = SelectList _ origSelectItems, ..} = do
Settings {..} <- asks envSettings
tables <- asks envTables
let countDistinctCols = [ col | col@(FactCountDistinct _ _) <- factColumns fact]
@ -70,50 +67,57 @@ factTableUpdateSQL popMode fact groupByColPrefix populateSelectSQL@FactTablePopu
suffixTableName popMode settingTableNameSuffixTemplate
$ extractedFactTableName settingFactPrefix settingFactInfix (factName fact) settingTimeUnit
return . (\xs -> if null xs then xs else ilog2FunctionString : xs)
$ for countDistinctCols $ \(FactCountDistinct scName cName) ->
let unqCol = fullColumnName fTableName (fromMaybe tablePKColName scName) <> "::text"
return $ for countDistinctCols $ \(FactCountDistinct scName cName) ->
let unqCol = cast (eqi fTableName (fromMaybe tablePKColName scName)) "text"
bucketSelectCols =
[ ( "hashtext(" <> unqCol <> ") & "
<> Text.pack (show $ bucketCount settingFactCountDistinctErrorRate - 1)
, cName <> "_bnum"
)
, ( "31 - ilog2(min(hashtext(" <> unqCol <> ") & ~(1 << 31)))"
, cName <> "_bhash"
)
[ sia (binop "&" (app "hashtext" [ unqCol ])
(num . Text.pack . show $ bucketCount settingFactCountDistinctErrorRate - 1))
(nmc $ cName <> "_bnum")
, sia (binop "-"
(num "31")
(app "ilog2"
[ app "min" [ binop "&"
(app "hashtext" [ unqCol ])
(prefop "~" (parens (binop "<<" (num "1") (num "31"))))]]))
(nmc $ cName <> "_bhash")
]
selectSQL = toSelectSQL $
populateSelectSQL
{ ftpsSelectCols = filter ((`elem` ftpsGroupByCols) . snd) ftpsSelectCols ++ bucketSelectCols
, ftpsGroupByCols = ftpsGroupByCols ++ [ cName <> "_bnum" ]
, ftpsWhereClauses = ftpsWhereClauses ++ [ unqCol <> " IS NOT NULL" ]
groupByCols = map ppScalarExpr selGroupBy
selectList =
[ i | i@(SelectItem _ _ a) <- origSelectItems , a `elem` map nmc groupByCols ]
selectStmt =
makeSelect
{ selSelectList = sl $ selectList ++ bucketSelectCols
, selTref = selTref
, selWhere = binop "and" (postop "isnotnull" unqCol) <$> selWhere
, selGroupBy = selGroupBy ++ [ ei $ cName <> "_bnum" ]
}
aggSelectClause =
"json_object_agg(" <> cName <> "_bnum, " <> cName <> "_bhash) AS " <> cName
in "UPDATE " <> extFactTableName
<> "\nSET " <> cName <> " = " <> fullColumnName "xyz" cName
<> "\nFROM ("
<> "\nSELECT " <> joinColumnNames (ftpsGroupByCols ++ [aggSelectClause])
<> "\nFROM (\n" <> selectSQL <> "\n) zyx"
<> "\nGROUP BY \n" <> joinColumnNames ftpsGroupByCols
<> "\n) xyz"
<> "\n WHERE\n"
<> Text.intercalate "\nAND "
[ fullColumnName extFactTableName .fromJust . Text.stripPrefix groupByColPrefix $ col
<> " = " <> fullColumnName "xyz" col
| col <- ftpsGroupByCols ]
sia (app "json_object_agg" [ ei (cName <> "_bnum"), ei (cName <> "_bhash") ]) (nmc cName)
in update extFactTableName
[ (cName, eqi "xyz" cName) ]
[ subtrefa "xyz"
makeSelect
{ selSelectList = sl $ map (si . ei) groupByCols ++ [ aggSelectClause ]
, selTref = [ subtrefa "zyx" selectStmt ]
, selGroupBy = selGroupBy
} ] $
foldBinop "and"
[ binop "=" (eqi extFactTableName . fromJust . Text.stripPrefix groupByColPrefix $ col)
(eqi "xyz" col)
| col <- groupByCols ]
where
bucketCount :: Double -> Integer
bucketCount errorRate =
let power :: Double = fromIntegral (ceiling . logBase 2 $ (1.04 / errorRate) ** 2 :: Integer)
in ceiling $ 2 ** power
factTablePopulateSQL :: TablePopulationMode -> Fact -> Reader Env [Text]
factTablePopulateSQL popMode fact = do
factTablePopulateStmts :: TablePopulationMode -> Fact -> Reader Env [Statement]
factTablePopulateStmts popMode fact = do
Settings {..} <- asks envSettings
allDims <- extractAllDimensionTables fact
tables <- asks envTables
@ -122,116 +126,102 @@ factTablePopulateSQL popMode fact = do
fTable = fromJust . findTable fTableName $ tables
dimIdColName = settingDimTableIdColumnName
coalesceFKId col =
if "coalesce" `Text.isPrefixOf` col
then col
else "coalesce((" <> col <> "), " <> Text.pack (show settingForeignKeyIdCoalesceValue) <> ")"
coalesceFKId ex =
app "coalesce" [ ex, num . Text.pack . show $ settingForeignKeyIdCoalesceValue ]
timeUnitColumnInsertSQL cName =
let colName = timeUnitColumnName dimIdColName cName settingTimeUnit
in ( colName
, "floor(extract(epoch from " <> fullColumnName fTableName cName <> ")/"
<> Text.pack (show $ timeUnitToSeconds settingTimeUnit) <> ")::bigint"
, cast (app "floor" [ binop "/" (extEpoch (eqi fTableName cName))
(num . Text.pack . show . timeUnitToSeconds $ settingTimeUnit) ])
"bigint"
, True
)
dimIdColumnInsertSQL cName =
let sCol = fromJust . findColumn cName $ tableColumns fTable
in (cName, coalesceColumn defaults fTableName sCol, True)
app' f cName = app f [ eqi fTableName cName ]
factColMap = concatFor (factColumns fact) $ \col -> case col of
DimTime cName -> [ timeUnitColumnInsertSQL cName ]
NoDimId cName -> [ dimIdColumnInsertSQL cName ]
TenantId cName -> [ dimIdColumnInsertSQL cName ]
FactCount scName cName ->
[ (cName, "count(" <> maybe "*" (fullColumnName fTableName) scName <> ")", False) ]
FactSum scName cName ->
[ (cName, "sum(" <> fullColumnName fTableName scName <> ")", False) ]
FactMax scName cName ->
[ (cName, "max(" <> fullColumnName fTableName scName <> ")", False) ]
FactMin scName cName ->
[ (cName, "min(" <> fullColumnName fTableName scName <> ")", False) ]
[ (cName, app "count" [ maybe star (eqi fTableName) scName ], False) ]
FactSum scName cName -> [ (cName, app' "sum" scName, False) ]
FactMax scName cName -> [ (cName, app' "max" scName, False) ]
FactMin scName cName -> [ (cName, app' "min" scName, False) ]
FactAverage scName cName ->
[ ( cName <> settingAvgCountColumSuffix
, "count(" <> fullColumnName fTableName scName <> ")"
, False
)
, ( cName <> settingAvgSumColumnSuffix
, "sum(" <> fullColumnName fTableName scName <> ")"
, False
)
[ ( cName <> settingAvgCountColumSuffix, app' "count" scName, False )
, ( cName <> settingAvgSumColumnSuffix , app' "sum" scName , False)
]
FactCountDistinct _ cName -> [ (cName, "'{}'::json", False)]
_ -> []
FactCountDistinct _ cName -> [ (cName, cast (str "{}") "json", False) ]
_ -> []
dimColMap = for allDims $ \(dimFact, factTable@Table {tableName}) -> let
dimFKIdColName = factDimFKIdColumnName settingDimPrefix dimIdColName tableName
factSourceTableName = factTableName dimFact
factSourceTable = fromJust . findTable factSourceTableName $ tables
dimFKIdColumn = fromJust . findColumn dimFKIdColName $ tableColumns factSourceTable
dimLookupWhereClauses =
[ fullColumnName tableName dimColName <> " = " <> coalesceColumn defaults factSourceTableName sourceCol
dimLookupWhereClauses = Just . foldBinop "and" $
[ binop "=" (eqi tableName dimColName) (coalesceColumn defaults factSourceTableName sourceCol)
| (dimColName, sourceColName) <- dimColumnMapping settingDimPrefix dimFact tableName
, let sourceCol = fromJust . findColumn sourceColName $ tableColumns factSourceTable ]
insertSQL = if factTable `elem` tables -- existing dimension table
insertExpr = if factTable `elem` tables -- existing dimension table
then (if columnNullable dimFKIdColumn == Null then coalesceFKId else id)
$ fullColumnName factSourceTableName dimFKIdColName
else "SELECT " <> dimIdColName <> " FROM "
<> suffixTableName popMode settingTableNameSuffixTemplate tableName <> " " <> tableName
<> "\nWHERE " <> Text.intercalate "\n AND " dimLookupWhereClauses
in (dimFKIdColName, coalesceFKId insertSQL, True)
colMap = [ (cName, (sql, groupByColPrefix <> cName), addToGroupBy)
| (cName, sql, addToGroupBy) <- factColMap ++ dimColMap ]
$ eqi factSourceTableName dimFKIdColName
else coalesceFKId . subQueryExp $
makeSelect
{ selSelectList = sl [ si $ ei dimIdColName ]
, selTref =
[ trefa (suffixTableName popMode settingTableNameSuffixTemplate tableName) tableName ]
, selWhere = dimLookupWhereClauses
}
in (dimFKIdColName, insertExpr, True)
colMap = [ (cName, (expr, nmc $ groupByColPrefix <> cName), addToGroupBy)
| (cName, expr, addToGroupBy) <- factColMap ++ dimColMap ]
joinClauses =
mapMaybe (\tName -> (\p -> "LEFT JOIN " <> tName <> "\nON "<> p) <$> joinClausePreds fTable tName)
map (tref &&& joinClausePreds fTable)
. filter (/= fTableName)
. nub
. map (factTableName . fst)
$ allDims
timeCol = fullColumnName fTableName $ head [ cName | DimTime cName <- factColumns fact ]
timeCol = eqi fTableName $ head [ cName | DimTime cName <- factColumns fact ]
extFactTableName = suffixTableName popMode settingTableNameSuffixTemplate
$ extractedFactTableName settingFactPrefix settingFactInfix (factName fact) settingTimeUnit
populateSelectSQL =
FactTablePopulateSelectSQL
{ ftpsSelectCols = map snd3 colMap
, ftpsSelectTable = fTableName
, ftpsJoinClauses = joinClauses
, ftpsWhereClauses =
timeCol <> " < ?" : [ timeCol <> " >= ?" | popMode == IncrementalPopulation ]
, ftpsGroupByCols = map ((groupByColPrefix <>) . fst3) . filter thd3 $ colMap
populateSelectExpr =
makeSelect
{ selSelectList = sl . map (uncurry sia . snd3) $ colMap
, selTref = [ foldl (\tf (t, oc) -> tjoin tf LeftOuter t oc) (tref fTableName) joinClauses ]
, selWhere = Just . foldBinop "and" $
binop "<" timeCol placeholder :
[ binop ">=" timeCol placeholder | popMode == IncrementalPopulation ]
, selGroupBy = map (ei . (groupByColPrefix <>) . fst3) . filter thd3 $ colMap
}
insertIntoSQL = "INSERT INTO " <> extFactTableName
<> " (\n" <> Text.intercalate ",\n " (map fst3 colMap) <> "\n)\n"
<> toSelectSQL populateSelectSQL
insertIntoStmt = insert extFactTableName (map fst3 colMap) populateSelectExpr
updateSQLs <- factTableUpdateSQL popMode fact groupByColPrefix populateSelectSQL
return $ insertIntoSQL : updateSQLs
updateStmts <- factCountDistinctUpdateStmts popMode fact groupByColPrefix populateSelectExpr
return $ insertIntoStmt : updateStmts
where
groupByColPrefix = "xxff_"
joinClausePreds table oTableName =
Text.intercalate " AND "
. map (\(c1, c2) -> fullColumnName (tableName table) c1 <> " = " <> fullColumnName oTableName c2)
foldBinop "and"
. map (\(c1, c2) -> binop "=" (eqi (tableName table) c1) (eqi oTableName c2))
<$> listToMaybe [ colPairs | ForeignKey tName colPairs <- tableConstraints table
, tName == oTableName ]
toSelectSQL :: FactTablePopulateSelectSQL -> Text
toSelectSQL FactTablePopulateSelectSQL {..} =
"SELECT \n" <> joinColumnNames (map (uncurry asName) ftpsSelectCols)
<> "\nFROM " <> ftpsSelectTable
<> (if not . null $ ftpsJoinClauses
then "\n" <> Text.intercalate "\n" ftpsJoinClauses
else "")
<> (if not . null $ ftpsWhereClauses
then "\nWHERE " <> Text.intercalate "\nAND " ftpsWhereClauses
else "")
<> "\nGROUP BY \n"
<> joinColumnNames ftpsGroupByCols
where
asName sql alias = "(" <> sql <> ")" <> " as " <> alias
factTablePopulateSQL :: TablePopulationMode -> Fact -> Reader Env [Text]
factTablePopulateSQL popMode fact = do
stmts <- factTablePopulateStmts popMode fact
return $ case stmts of
[] -> []
[i] -> [ ppStatement i ]
i:us -> [ ppStatement i, ilog2FunctionString ] ++ map ppStatement us

@ -1,14 +1,14 @@
{-# LANGUAGE OverloadedStrings #-}
module Ringo.Generator.Sql where
import qualified Data.Text as Text
import qualified Data.Text as Text
import qualified Data.Text.Lazy as TL
import Database.HsSqlPpp.Annotation
import Database.HsSqlPpp.Dialect (postgresDialect)
import Database.HsSqlPpp.Dialect
import Database.HsSqlPpp.Pretty
import Database.HsSqlPpp.Syntax
import Data.Text (Text)
import Data.Text (Text)
ea :: Annotation
ea = emptyAnnotation
@ -24,28 +24,31 @@ attDef nam typ constr =
AttributeDef ea (nmc nam) (SimpleTypeName ea $ name typ) Nothing [constr]
member :: ScalarExpr -> ScalarExpr -> ScalarExpr
member a b = BinaryOp ea (name ".") a b
member = BinaryOp ea (name ".")
num :: Text -> ScalarExpr
num n = NumberLit ea $ Text.unpack n
num = NumberLit ea . Text.unpack
str :: Text -> ScalarExpr
str = StringLit ea . Text.unpack
extEpoch :: ScalarExpr -> ScalarExpr
extEpoch = Extract ea ExtractEpoch
app :: Text -> [ScalarExpr] -> ScalarExpr
app n as = App ea (name n) as
app n = App ea (name n)
specop :: Text -> [ScalarExpr] -> ScalarExpr
specop n as = SpecialOp ea (name n) as
cast :: ScalarExpr -> Text -> ScalarExpr
cast ex = Cast ea ex . SimpleTypeName ea . name
prefop :: Text -> ScalarExpr -> ScalarExpr
prefop n a = PrefixOp ea (name n) a
prefop n = PrefixOp ea (name n)
postop :: Text -> ScalarExpr -> ScalarExpr
postop n a = PostfixOp ea (name n) a
postop n = PostfixOp ea (name n)
binop :: Text -> ScalarExpr -> ScalarExpr -> ScalarExpr
binop n a0 a1 = BinaryOp ea (name n) a0 a1
binop n = BinaryOp ea (name n)
foldBinop :: Text -> [ScalarExpr] -> ScalarExpr
foldBinop _ [] = error "List must be non empty"
@ -60,9 +63,15 @@ parens = Parens ea
qstar :: Text -> ScalarExpr
qstar = QStar ea . nmc
star :: ScalarExpr
star = Star ea
subQueryExp :: QueryExpr -> ScalarExpr
subQueryExp = ScalarSubQuery ea
-- Table ref
tref :: Text -> TableRef
tref s = Tref ea (name s)
tref = Tref ea . name
-- Table ref alias
trefa :: Text -> Text -> TableRef
@ -82,11 +91,11 @@ si = SelExp ea
-- Select item alias
sia :: ScalarExpr -> NameComponent -> SelectItem
sia e a = SelectItem ea e a
sia = SelectItem ea
-- Expression qualified identifier
eqi :: Text -> Text -> ScalarExpr
eqi c x = Identifier ea $ qn c x
eqi c = Identifier ea . qn c
-- Expression identifier
ei :: Text -> ScalarExpr
@ -100,15 +109,20 @@ qn c n = Name ea [nmc c, nmc n]
sl :: [SelectItem] -> SelectList
sl = SelectList ea
-- Insert statement
insert :: Text -> [Text] -> QueryExpr -> Statement
insert tName cNames selectExp =
Insert ea (name tName) (map nmc cNames) selectExp Nothing
ppSQL :: Statement -> Text
ppSQL st = TL.toStrict $ prettyStatements (PrettyFlags postgresDialect) [st]
-- Update statement
update :: Text -> [(Text, ScalarExpr)] -> [TableRef] -> ScalarExpr -> Statement
update tName setClauseList fromList whr =
Update ea (name tName) (map (uncurry (SetClause ea . nmc)) setClauseList) fromList (Just whr) Nothing
-- Pretty print statement
ppStatement :: Statement -> Text
ppStatement st = TL.toStrict $ prettyStatements (PrettyFlags postgresDialect) [st]
-- Pretty print scalar expression
ppScalarExpr :: ScalarExpr -> Text
ppScalarExpr = TL.toStrict . prettyScalarExpr (PrettyFlags postgresDialect)
ppQueryExpr :: QueryExpr -> Text
ppQueryExpr = TL.toStrict . prettyQueryExpr (PrettyFlags postgresDialect)

Loading…
Cancel
Save