Changes fact populatation generator to use hssqlppp internally.

pull/1/head
Abhinav Sarkar 2016-01-25 22:13:47 +05:30
parent d33dcbe33c
commit 46e5e43856
5 changed files with 149 additions and 154 deletions

View File

@ -47,8 +47,8 @@ tableDefnStmts Table {..} = do
tableDefnSQL :: Table -> (Table -> Reader Env [Statement]) -> Reader Env [Text]
tableDefnSQL table indexFn = do
ds <- map ppSQL <$> tableDefnStmts table
is <- map (\st -> ppSQL st <> ";\n") <$> indexFn table
ds <- map ppStatement <$> tableDefnStmts table
is <- map (\st -> ppStatement st <> ";\n") <$> indexFn table
return $ ds ++ is
dimensionTableDefnSQL :: Table -> Reader Env [Text]

View File

@ -14,23 +14,14 @@ import Ringo.Extractor.Internal
import Ringo.Generator.Sql
import Ringo.Types
joinColumnNames :: [ColumnName] -> Text
joinColumnNames = Text.intercalate ",\n"
fullColumnName :: TableName -> ColumnName -> ColumnName
fullColumnName tName cName = tName <> "." <> cName
dimColumnMapping :: Text -> Fact -> TableName -> [(ColumnName, ColumnName)]
dimColumnMapping dimPrefix fact dimTableName =
[ (dimColumnName dName cName, cName)
| DimVal dName cName <- factColumns fact
, dimPrefix <> dName == dimTableName ]
coalesceColumn :: TypeDefaults -> TableName -> Column -> Text
coalesceColumn defaults tName = ppScalarExpr . coalesceColumn' defaults tName
coalesceColumn' :: TypeDefaults -> TableName -> Column -> ScalarExpr
coalesceColumn' defaults tName Column{..} =
coalesceColumn :: TypeDefaults -> TableName -> Column -> ScalarExpr
coalesceColumn defaults tName Column{..} =
if columnNullable == Null
then app "coalesce" [fqColName, num $ defVal columnType]
else fqColName

View File

@ -20,16 +20,16 @@ import Ringo.Types
dimensionTablePopulateSQL :: TablePopulationMode -> Fact -> TableName -> Reader Env Text
dimensionTablePopulateSQL popMode fact dimTableName =
ppSQL <$> dimensionTablePopulateSQL' popMode fact dimTableName
ppStatement <$> dimensionTablePopulateStmt popMode fact dimTableName
dimensionTablePopulateSQL' :: TablePopulationMode -> Fact -> TableName -> Reader Env Statement
dimensionTablePopulateSQL' popMode fact dimTableName = do
dimensionTablePopulateStmt :: TablePopulationMode -> Fact -> TableName -> Reader Env Statement
dimensionTablePopulateStmt popMode fact dimTableName = do
Settings {..} <- asks envSettings
tables <- asks envTables
defaults <- asks envTypeDefaults
let factTable = fromJust $ findTable (factTableName fact) tables
colMapping = dimColumnMapping settingDimPrefix fact dimTableName
selectCols = [ flip sia (nmc cName) $ coalesceColumn' defaults (factTableName fact) col
selectCols = [ flip sia (nmc cName) $ coalesceColumn defaults (factTableName fact) col
| (_, cName) <- colMapping
, let col = fromJust . findColumn cName $ tableColumns factTable ]
timeCol = head [ cName | DimTime cName <- factColumns fact ]

View File

@ -14,14 +14,17 @@ import Control.Applicative ((<$>))
#endif
import Control.Monad.Reader (Reader, asks)
import Database.HsSqlPpp.Syntax ( QueryExpr(..), Statement, makeSelect
, SelectList(..), SelectItem(..), JoinType(..) )
import Data.List (nub)
import Data.Maybe (fromJust, fromMaybe, mapMaybe, listToMaybe)
import Data.Maybe (fromJust, fromMaybe, listToMaybe)
import Data.Monoid ((<>))
import Data.Text (Text)
import Text.RawString.QQ (r)
import Ringo.Extractor.Internal
import Ringo.Generator.Internal
import Ringo.Generator.Sql
import Ringo.Types
import Ringo.Utils
@ -48,18 +51,12 @@ BEGIN
RETURN r;
END;
$$
LANGUAGE 'plpgsql' IMMUTABLE|]
LANGUAGE 'plpgsql' IMMUTABLE;
|]
data FactTablePopulateSelectSQL = FactTablePopulateSelectSQL
{ ftpsSelectCols :: ![(Text, Text)]
, ftpsSelectTable :: !Text
, ftpsJoinClauses :: ![Text]
, ftpsWhereClauses :: ![Text]
, ftpsGroupByCols :: ![Text]
} deriving (Show, Eq)
factTableUpdateSQL :: TablePopulationMode -> Fact -> Text -> FactTablePopulateSelectSQL -> Reader Env [Text]
factTableUpdateSQL popMode fact groupByColPrefix populateSelectSQL@FactTablePopulateSelectSQL {..} = do
factCountDistinctUpdateStmts :: TablePopulationMode -> Fact -> Text -> QueryExpr -> Reader Env [Statement]
factCountDistinctUpdateStmts
popMode fact groupByColPrefix ~Select {selSelectList = SelectList _ origSelectItems, ..} = do
Settings {..} <- asks envSettings
tables <- asks envTables
let countDistinctCols = [ col | col@(FactCountDistinct _ _) <- factColumns fact]
@ -70,50 +67,57 @@ factTableUpdateSQL popMode fact groupByColPrefix populateSelectSQL@FactTablePopu
suffixTableName popMode settingTableNameSuffixTemplate
$ extractedFactTableName settingFactPrefix settingFactInfix (factName fact) settingTimeUnit
return . (\xs -> if null xs then xs else ilog2FunctionString : xs)
$ for countDistinctCols $ \(FactCountDistinct scName cName) ->
let unqCol = fullColumnName fTableName (fromMaybe tablePKColName scName) <> "::text"
return $ for countDistinctCols $ \(FactCountDistinct scName cName) ->
let unqCol = cast (eqi fTableName (fromMaybe tablePKColName scName)) "text"
bucketSelectCols =
[ ( "hashtext(" <> unqCol <> ") & "
<> Text.pack (show $ bucketCount settingFactCountDistinctErrorRate - 1)
, cName <> "_bnum"
)
, ( "31 - ilog2(min(hashtext(" <> unqCol <> ") & ~(1 << 31)))"
, cName <> "_bhash"
)
[ sia (binop "&" (app "hashtext" [ unqCol ])
(num . Text.pack . show $ bucketCount settingFactCountDistinctErrorRate - 1))
(nmc $ cName <> "_bnum")
, sia (binop "-"
(num "31")
(app "ilog2"
[ app "min" [ binop "&"
(app "hashtext" [ unqCol ])
(prefop "~" (parens (binop "<<" (num "1") (num "31"))))]]))
(nmc $ cName <> "_bhash")
]
selectSQL = toSelectSQL $
populateSelectSQL
{ ftpsSelectCols = filter ((`elem` ftpsGroupByCols) . snd) ftpsSelectCols ++ bucketSelectCols
, ftpsGroupByCols = ftpsGroupByCols ++ [ cName <> "_bnum" ]
, ftpsWhereClauses = ftpsWhereClauses ++ [ unqCol <> " IS NOT NULL" ]
groupByCols = map ppScalarExpr selGroupBy
selectList =
[ i | i@(SelectItem _ _ a) <- origSelectItems , a `elem` map nmc groupByCols ]
selectStmt =
makeSelect
{ selSelectList = sl $ selectList ++ bucketSelectCols
, selTref = selTref
, selWhere = binop "and" (postop "isnotnull" unqCol) <$> selWhere
, selGroupBy = selGroupBy ++ [ ei $ cName <> "_bnum" ]
}
aggSelectClause =
"json_object_agg(" <> cName <> "_bnum, " <> cName <> "_bhash) AS " <> cName
sia (app "json_object_agg" [ ei (cName <> "_bnum"), ei (cName <> "_bhash") ]) (nmc cName)
in "UPDATE " <> extFactTableName
<> "\nSET " <> cName <> " = " <> fullColumnName "xyz" cName
<> "\nFROM ("
<> "\nSELECT " <> joinColumnNames (ftpsGroupByCols ++ [aggSelectClause])
<> "\nFROM (\n" <> selectSQL <> "\n) zyx"
<> "\nGROUP BY \n" <> joinColumnNames ftpsGroupByCols
<> "\n) xyz"
<> "\n WHERE\n"
<> Text.intercalate "\nAND "
[ fullColumnName extFactTableName .fromJust . Text.stripPrefix groupByColPrefix $ col
<> " = " <> fullColumnName "xyz" col
| col <- ftpsGroupByCols ]
in update extFactTableName
[ (cName, eqi "xyz" cName) ]
[ subtrefa "xyz"
makeSelect
{ selSelectList = sl $ map (si . ei) groupByCols ++ [ aggSelectClause ]
, selTref = [ subtrefa "zyx" selectStmt ]
, selGroupBy = selGroupBy
} ] $
foldBinop "and"
[ binop "=" (eqi extFactTableName . fromJust . Text.stripPrefix groupByColPrefix $ col)
(eqi "xyz" col)
| col <- groupByCols ]
where
bucketCount :: Double -> Integer
bucketCount errorRate =
let power :: Double = fromIntegral (ceiling . logBase 2 $ (1.04 / errorRate) ** 2 :: Integer)
in ceiling $ 2 ** power
factTablePopulateSQL :: TablePopulationMode -> Fact -> Reader Env [Text]
factTablePopulateSQL popMode fact = do
factTablePopulateStmts :: TablePopulationMode -> Fact -> Reader Env [Statement]
factTablePopulateStmts popMode fact = do
Settings {..} <- asks envSettings
allDims <- extractAllDimensionTables fact
tables <- asks envTables
@ -122,45 +126,37 @@ factTablePopulateSQL popMode fact = do
fTable = fromJust . findTable fTableName $ tables
dimIdColName = settingDimTableIdColumnName
coalesceFKId col =
if "coalesce" `Text.isPrefixOf` col
then col
else "coalesce((" <> col <> "), " <> Text.pack (show settingForeignKeyIdCoalesceValue) <> ")"
coalesceFKId ex =
app "coalesce" [ ex, num . Text.pack . show $ settingForeignKeyIdCoalesceValue ]
timeUnitColumnInsertSQL cName =
let colName = timeUnitColumnName dimIdColName cName settingTimeUnit
in ( colName
, "floor(extract(epoch from " <> fullColumnName fTableName cName <> ")/"
<> Text.pack (show $ timeUnitToSeconds settingTimeUnit) <> ")::bigint"
, cast (app "floor" [ binop "/" (extEpoch (eqi fTableName cName))
(num . Text.pack . show . timeUnitToSeconds $ settingTimeUnit) ])
"bigint"
, True
)
dimIdColumnInsertSQL cName =
let sCol = fromJust . findColumn cName $ tableColumns fTable
in (cName, coalesceColumn defaults fTableName sCol, True)
app' f cName = app f [ eqi fTableName cName ]
factColMap = concatFor (factColumns fact) $ \col -> case col of
DimTime cName -> [ timeUnitColumnInsertSQL cName ]
NoDimId cName -> [ dimIdColumnInsertSQL cName ]
TenantId cName -> [ dimIdColumnInsertSQL cName ]
FactCount scName cName ->
[ (cName, "count(" <> maybe "*" (fullColumnName fTableName) scName <> ")", False) ]
FactSum scName cName ->
[ (cName, "sum(" <> fullColumnName fTableName scName <> ")", False) ]
FactMax scName cName ->
[ (cName, "max(" <> fullColumnName fTableName scName <> ")", False) ]
FactMin scName cName ->
[ (cName, "min(" <> fullColumnName fTableName scName <> ")", False) ]
[ (cName, app "count" [ maybe star (eqi fTableName) scName ], False) ]
FactSum scName cName -> [ (cName, app' "sum" scName, False) ]
FactMax scName cName -> [ (cName, app' "max" scName, False) ]
FactMin scName cName -> [ (cName, app' "min" scName, False) ]
FactAverage scName cName ->
[ ( cName <> settingAvgCountColumSuffix
, "count(" <> fullColumnName fTableName scName <> ")"
, False
)
, ( cName <> settingAvgSumColumnSuffix
, "sum(" <> fullColumnName fTableName scName <> ")"
, False
)
[ ( cName <> settingAvgCountColumSuffix, app' "count" scName, False )
, ( cName <> settingAvgSumColumnSuffix , app' "sum" scName , False)
]
FactCountDistinct _ cName -> [ (cName, "'{}'::json", False)]
FactCountDistinct _ cName -> [ (cName, cast (str "{}") "json", False) ]
_ -> []
dimColMap = for allDims $ \(dimFact, factTable@Table {tableName}) -> let
@ -168,70 +164,64 @@ factTablePopulateSQL popMode fact = do
factSourceTableName = factTableName dimFact
factSourceTable = fromJust . findTable factSourceTableName $ tables
dimFKIdColumn = fromJust . findColumn dimFKIdColName $ tableColumns factSourceTable
dimLookupWhereClauses =
[ fullColumnName tableName dimColName <> " = " <> coalesceColumn defaults factSourceTableName sourceCol
dimLookupWhereClauses = Just . foldBinop "and" $
[ binop "=" (eqi tableName dimColName) (coalesceColumn defaults factSourceTableName sourceCol)
| (dimColName, sourceColName) <- dimColumnMapping settingDimPrefix dimFact tableName
, let sourceCol = fromJust . findColumn sourceColName $ tableColumns factSourceTable ]
insertSQL = if factTable `elem` tables -- existing dimension table
insertExpr = if factTable `elem` tables -- existing dimension table
then (if columnNullable dimFKIdColumn == Null then coalesceFKId else id)
$ fullColumnName factSourceTableName dimFKIdColName
else "SELECT " <> dimIdColName <> " FROM "
<> suffixTableName popMode settingTableNameSuffixTemplate tableName <> " " <> tableName
<> "\nWHERE " <> Text.intercalate "\n AND " dimLookupWhereClauses
in (dimFKIdColName, coalesceFKId insertSQL, True)
$ eqi factSourceTableName dimFKIdColName
else coalesceFKId . subQueryExp $
makeSelect
{ selSelectList = sl [ si $ ei dimIdColName ]
, selTref =
[ trefa (suffixTableName popMode settingTableNameSuffixTemplate tableName) tableName ]
, selWhere = dimLookupWhereClauses
}
in (dimFKIdColName, insertExpr, True)
colMap = [ (cName, (sql, groupByColPrefix <> cName), addToGroupBy)
| (cName, sql, addToGroupBy) <- factColMap ++ dimColMap ]
colMap = [ (cName, (expr, nmc $ groupByColPrefix <> cName), addToGroupBy)
| (cName, expr, addToGroupBy) <- factColMap ++ dimColMap ]
joinClauses =
mapMaybe (\tName -> (\p -> "LEFT JOIN " <> tName <> "\nON "<> p) <$> joinClausePreds fTable tName)
map (tref &&& joinClausePreds fTable)
. filter (/= fTableName)
. nub
. map (factTableName . fst)
$ allDims
timeCol = fullColumnName fTableName $ head [ cName | DimTime cName <- factColumns fact ]
timeCol = eqi fTableName $ head [ cName | DimTime cName <- factColumns fact ]
extFactTableName = suffixTableName popMode settingTableNameSuffixTemplate
$ extractedFactTableName settingFactPrefix settingFactInfix (factName fact) settingTimeUnit
populateSelectSQL =
FactTablePopulateSelectSQL
{ ftpsSelectCols = map snd3 colMap
, ftpsSelectTable = fTableName
, ftpsJoinClauses = joinClauses
, ftpsWhereClauses =
timeCol <> " < ?" : [ timeCol <> " >= ?" | popMode == IncrementalPopulation ]
, ftpsGroupByCols = map ((groupByColPrefix <>) . fst3) . filter thd3 $ colMap
populateSelectExpr =
makeSelect
{ selSelectList = sl . map (uncurry sia . snd3) $ colMap
, selTref = [ foldl (\tf (t, oc) -> tjoin tf LeftOuter t oc) (tref fTableName) joinClauses ]
, selWhere = Just . foldBinop "and" $
binop "<" timeCol placeholder :
[ binop ">=" timeCol placeholder | popMode == IncrementalPopulation ]
, selGroupBy = map (ei . (groupByColPrefix <>) . fst3) . filter thd3 $ colMap
}
insertIntoSQL = "INSERT INTO " <> extFactTableName
<> " (\n" <> Text.intercalate ",\n " (map fst3 colMap) <> "\n)\n"
<> toSelectSQL populateSelectSQL
insertIntoStmt = insert extFactTableName (map fst3 colMap) populateSelectExpr
updateSQLs <- factTableUpdateSQL popMode fact groupByColPrefix populateSelectSQL
return $ insertIntoSQL : updateSQLs
updateStmts <- factCountDistinctUpdateStmts popMode fact groupByColPrefix populateSelectExpr
return $ insertIntoStmt : updateStmts
where
groupByColPrefix = "xxff_"
joinClausePreds table oTableName =
Text.intercalate " AND "
. map (\(c1, c2) -> fullColumnName (tableName table) c1 <> " = " <> fullColumnName oTableName c2)
foldBinop "and"
. map (\(c1, c2) -> binop "=" (eqi (tableName table) c1) (eqi oTableName c2))
<$> listToMaybe [ colPairs | ForeignKey tName colPairs <- tableConstraints table
, tName == oTableName ]
toSelectSQL :: FactTablePopulateSelectSQL -> Text
toSelectSQL FactTablePopulateSelectSQL {..} =
"SELECT \n" <> joinColumnNames (map (uncurry asName) ftpsSelectCols)
<> "\nFROM " <> ftpsSelectTable
<> (if not . null $ ftpsJoinClauses
then "\n" <> Text.intercalate "\n" ftpsJoinClauses
else "")
<> (if not . null $ ftpsWhereClauses
then "\nWHERE " <> Text.intercalate "\nAND " ftpsWhereClauses
else "")
<> "\nGROUP BY \n"
<> joinColumnNames ftpsGroupByCols
where
asName sql alias = "(" <> sql <> ")" <> " as " <> alias
factTablePopulateSQL :: TablePopulationMode -> Fact -> Reader Env [Text]
factTablePopulateSQL popMode fact = do
stmts <- factTablePopulateStmts popMode fact
return $ case stmts of
[] -> []
[i] -> [ ppStatement i ]
i:us -> [ ppStatement i, ilog2FunctionString ] ++ map ppStatement us

View File

@ -5,7 +5,7 @@ import qualified Data.Text as Text
import qualified Data.Text.Lazy as TL
import Database.HsSqlPpp.Annotation
import Database.HsSqlPpp.Dialect (postgresDialect)
import Database.HsSqlPpp.Dialect
import Database.HsSqlPpp.Pretty
import Database.HsSqlPpp.Syntax
import Data.Text (Text)
@ -24,28 +24,31 @@ attDef nam typ constr =
AttributeDef ea (nmc nam) (SimpleTypeName ea $ name typ) Nothing [constr]
member :: ScalarExpr -> ScalarExpr -> ScalarExpr
member a b = BinaryOp ea (name ".") a b
member = BinaryOp ea (name ".")
num :: Text -> ScalarExpr
num n = NumberLit ea $ Text.unpack n
num = NumberLit ea . Text.unpack
str :: Text -> ScalarExpr
str = StringLit ea . Text.unpack
app :: Text -> [ScalarExpr] -> ScalarExpr
app n as = App ea (name n) as
extEpoch :: ScalarExpr -> ScalarExpr
extEpoch = Extract ea ExtractEpoch
specop :: Text -> [ScalarExpr] -> ScalarExpr
specop n as = SpecialOp ea (name n) as
app :: Text -> [ScalarExpr] -> ScalarExpr
app n = App ea (name n)
cast :: ScalarExpr -> Text -> ScalarExpr
cast ex = Cast ea ex . SimpleTypeName ea . name
prefop :: Text -> ScalarExpr -> ScalarExpr
prefop n a = PrefixOp ea (name n) a
prefop n = PrefixOp ea (name n)
postop :: Text -> ScalarExpr -> ScalarExpr
postop n a = PostfixOp ea (name n) a
postop n = PostfixOp ea (name n)
binop :: Text -> ScalarExpr -> ScalarExpr -> ScalarExpr
binop n a0 a1 = BinaryOp ea (name n) a0 a1
binop n = BinaryOp ea (name n)
foldBinop :: Text -> [ScalarExpr] -> ScalarExpr
foldBinop _ [] = error "List must be non empty"
@ -60,9 +63,15 @@ parens = Parens ea
qstar :: Text -> ScalarExpr
qstar = QStar ea . nmc
star :: ScalarExpr
star = Star ea
subQueryExp :: QueryExpr -> ScalarExpr
subQueryExp = ScalarSubQuery ea
-- Table ref
tref :: Text -> TableRef
tref s = Tref ea (name s)
tref = Tref ea . name
-- Table ref alias
trefa :: Text -> Text -> TableRef
@ -82,11 +91,11 @@ si = SelExp ea
-- Select item alias
sia :: ScalarExpr -> NameComponent -> SelectItem
sia e a = SelectItem ea e a
sia = SelectItem ea
-- Expression qualified identifier
eqi :: Text -> Text -> ScalarExpr
eqi c x = Identifier ea $ qn c x
eqi c = Identifier ea . qn c
-- Expression identifier
ei :: Text -> ScalarExpr
@ -100,15 +109,20 @@ qn c n = Name ea [nmc c, nmc n]
sl :: [SelectItem] -> SelectList
sl = SelectList ea
-- Insert statement
insert :: Text -> [Text] -> QueryExpr -> Statement
insert tName cNames selectExp =
Insert ea (name tName) (map nmc cNames) selectExp Nothing
ppSQL :: Statement -> Text
ppSQL st = TL.toStrict $ prettyStatements (PrettyFlags postgresDialect) [st]
-- Update statement
update :: Text -> [(Text, ScalarExpr)] -> [TableRef] -> ScalarExpr -> Statement
update tName setClauseList fromList whr =
Update ea (name tName) (map (uncurry (SetClause ea . nmc)) setClauseList) fromList (Just whr) Nothing
-- Pretty print statement
ppStatement :: Statement -> Text
ppStatement st = TL.toStrict $ prettyStatements (PrettyFlags postgresDialect) [st]
-- Pretty print scalar expression
ppScalarExpr :: ScalarExpr -> Text
ppScalarExpr = TL.toStrict . prettyScalarExpr (PrettyFlags postgresDialect)
ppQueryExpr :: QueryExpr -> Text
ppQueryExpr = TL.toStrict . prettyQueryExpr (PrettyFlags postgresDialect)