Changes dimension populatation generator to use hssqlppp internally.

pull/1/head
Abhinav Sarkar 2016-01-04 01:32:36 +05:30
parent 4fe1006d0c
commit 6a107aaf8d
6 changed files with 265 additions and 42 deletions

View File

@ -56,7 +56,7 @@ writeFiles outputDir env@Env{..} = do
dimTables = [ (fact, extractDimensionTables env fact) | fact <- envFacts ] dimTables = [ (fact, extractDimensionTables env fact) | fact <- envFacts ]
factTables = [ (fact, extractFactTable env fact) | fact <- envFacts ] factTables = [ (fact, extractFactTable env fact) | fact <- envFacts ]
dimTableDefnSQLs = [ (Create, tableName table, unlines . map sqlStr $ tableDefnSQL env table) dimTableDefnSQLs = [ (Create, tableName table, unlines . map sqlStr $ dimensionTableDefnSQL env table)
| (_, tabs) <- dimTables | (_, tabs) <- dimTables
, table <- tabs , table <- tabs
, table `notElem` envTables ] , table `notElem` envTables ]

View File

@ -9,7 +9,7 @@ module Ringo
, extractFactTable , extractFactTable
, extractDimensionTables , extractDimensionTables
, extractDependencies , extractDependencies
, tableDefnSQL , dimensionTableDefnSQL
, factTableDefnSQL , factTableDefnSQL
, dimensionTablePopulateSQL , dimensionTablePopulateSQL
, factTablePopulateSQL , factTablePopulateSQL
@ -149,7 +149,7 @@ extractDependencies env = flip runReader env . E.extractDependencies
-- | -- |
-- --
-- >>> let storySessionDimTables = extractDimensionTables env sessionFact -- >>> let storySessionDimTables = extractDimensionTables env sessionFact
-- >>> let sqls = map (tableDefnSQL env) storySessionDimTables -- >>> let sqls = map (dimensionTableDefnSQL env) storySessionDimTables
-- >>> mapM_ (\sqls -> mapM_ (putStr . Text.unpack) sqls >> putStrLn "--------" ) sqls -- >>> mapM_ (\sqls -> mapM_ (putStr . Text.unpack) sqls >> putStrLn "--------" ) sqls
-- create table dim_geo ( -- create table dim_geo (
-- id serial not null, -- id serial not null,
@ -189,8 +189,8 @@ extractDependencies env = flip runReader env . E.extractDependencies
-- device); -- device);
-- <BLANKLINE> -- <BLANKLINE>
-- -------- -- --------
tableDefnSQL :: Env -> Table -> [Text] dimensionTableDefnSQL :: Env -> Table -> [Text]
tableDefnSQL env = flip runReader env . G.tableDefnSQL dimensionTableDefnSQL env = flip runReader env . G.tableDefnSQL
-- | -- |
-- --
@ -222,6 +222,129 @@ tableDefnSQL env = flip runReader env . G.tableDefnSQL
factTableDefnSQL :: Env -> Fact -> Table -> [Text] factTableDefnSQL :: Env -> Fact -> Table -> [Text]
factTableDefnSQL env fact = flip runReader env . G.factTableDefnSQL fact factTableDefnSQL env fact = flip runReader env . G.factTableDefnSQL fact
-- |
--
-- >>> let storySessionDimTableNames = map tableName $ extractDimensionTables env sessionFact
-- >>> let sqls = map (dimensionTablePopulateSQL FullPopulation env sessionFact) storySessionDimTableNames
-- >>> mapM_ (putStr . Text.unpack) sqls
-- insert into dim_geo (country_name,
-- city_name,
-- continent_name,
-- most_specific_subdivision_name,
-- time_zone)
-- select distinct
-- coalesce(session_events.geo_country_name,'__UNKNOWN_VAL__') as geo_country_name,
-- coalesce(session_events.geo_city_name,'__UNKNOWN_VAL__') as geo_city_name,
-- coalesce(session_events.geo_continent_name,'__UNKNOWN_VAL__') as geo_continent_name,
-- coalesce(session_events.geo_most_specific_subdivision_name,'__UNKNOWN_VAL__') as geo_most_specific_subdivision_name,
-- coalesce(session_events.geo_time_zone,'__UNKNOWN_VAL__') as geo_time_zone
-- from
-- session_events
-- where
-- (geo_country_name is not null or geo_city_name is not null or geo_continent_name is not null or geo_most_specific_subdivision_name is not null or geo_time_zone is not null)
-- and
-- created_at <= ?
-- ;
-- <BLANKLINE>
-- insert into dim_user_agent (browser_name, os, name, type, device)
-- select distinct
-- coalesce(session_events.browser_name,'__UNKNOWN_VAL__') as browser_name,
-- coalesce(session_events.os,'__UNKNOWN_VAL__') as os,
-- coalesce(session_events.user_agent_name,'__UNKNOWN_VAL__') as user_agent_name,
-- coalesce(session_events.user_agent_type,'__UNKNOWN_VAL__') as user_agent_type,
-- coalesce(session_events.user_agent_device,'__UNKNOWN_VAL__') as user_agent_device
-- from
-- session_events
-- where
-- (browser_name is not null or os is not null or user_agent_name is not null or user_agent_type is not null or user_agent_device is not null)
-- and
-- created_at <= ?
-- ;
-- <BLANKLINE>
-- >>> let sqls = map (dimensionTablePopulateSQL IncrementalPopulation env sessionFact) storySessionDimTableNames
-- >>> mapM_ (putStr . Text.unpack) sqls
-- insert into dim_geo (country_name,
-- city_name,
-- continent_name,
-- most_specific_subdivision_name,
-- time_zone)
-- select
-- x.*
-- from
-- (select distinct
-- coalesce(session_events.geo_country_name,'__UNKNOWN_VAL__') as geo_country_name,
-- coalesce(session_events.geo_city_name,'__UNKNOWN_VAL__') as geo_city_name,
-- coalesce(session_events.geo_continent_name,'__UNKNOWN_VAL__') as geo_continent_name,
-- coalesce(session_events.geo_most_specific_subdivision_name,'__UNKNOWN_VAL__') as geo_most_specific_subdivision_name,
-- coalesce(session_events.geo_time_zone,'__UNKNOWN_VAL__') as geo_time_zone
-- from
-- session_events
-- where
-- (geo_country_name is not null or geo_city_name is not null or geo_continent_name is not null or geo_most_specific_subdivision_name is not null or geo_time_zone is not null)
-- and
-- created_at <= ?
-- and
-- created_at > ?) as x
-- left outer join
-- dim_geo
-- on dim_geo.country_name = x.geo_country_name
-- and
-- dim_geo.city_name = x.geo_city_name
-- and
-- dim_geo.continent_name = x.geo_continent_name
-- and
-- dim_geo.most_specific_subdivision_name = x.geo_most_specific_subdivision_name
-- and
-- dim_geo.time_zone = x.geo_time_zone
-- where
-- dim_geo.country_name is null and dim_geo.city_name is null
-- and
-- dim_geo.continent_name is null
-- and
-- dim_geo.most_specific_subdivision_name is null
-- and
-- dim_geo.time_zone is null
-- ;
-- <BLANKLINE>
-- insert into dim_user_agent (browser_name, os, name, type, device)
-- select
-- x.*
-- from
-- (select distinct
-- coalesce(session_events.browser_name,'__UNKNOWN_VAL__') as browser_name,
-- coalesce(session_events.os,'__UNKNOWN_VAL__') as os,
-- coalesce(session_events.user_agent_name,'__UNKNOWN_VAL__') as user_agent_name,
-- coalesce(session_events.user_agent_type,'__UNKNOWN_VAL__') as user_agent_type,
-- coalesce(session_events.user_agent_device,'__UNKNOWN_VAL__') as user_agent_device
-- from
-- session_events
-- where
-- (browser_name is not null or os is not null or user_agent_name is not null or user_agent_type is not null or user_agent_device is not null)
-- and
-- created_at <= ?
-- and
-- created_at > ?) as x
-- left outer join
-- dim_user_agent
-- on dim_user_agent.browser_name = x.browser_name
-- and
-- dim_user_agent.os = x.os
-- and
-- dim_user_agent.name = x.user_agent_name
-- and
-- dim_user_agent.type = x.user_agent_type
-- and
-- dim_user_agent.device = x.user_agent_device
-- where
-- dim_user_agent.browser_name is null and dim_user_agent.os is null
-- and
-- dim_user_agent.name is null
-- and
-- dim_user_agent.type is null
-- and
-- dim_user_agent.device is null
-- ;
-- <BLANKLINE>
dimensionTablePopulateSQL :: TablePopulationMode -> Env -> Fact -> TableName -> Text dimensionTablePopulateSQL :: TablePopulationMode -> Env -> Fact -> TableName -> Text
dimensionTablePopulateSQL popMode env fact = dimensionTablePopulateSQL popMode env fact =
flip runReader env . G.dimensionTablePopulateSQL popMode fact flip runReader env . G.dimensionTablePopulateSQL popMode fact

View File

@ -31,7 +31,7 @@ tableDefnSQL' Table {..} = do
tableSQL = CreateTable ea (name tabName) (map columnDefnSQL tableColumns) [] Nothing tableSQL = CreateTable ea (name tabName) (map columnDefnSQL tableColumns) [] Nothing
columnDefnSQL Column {..} = columnDefnSQL Column {..} =
att columnName columnType $ nullableDefnSQL columnNullable attDef columnName columnType $ nullableDefnSQL columnNullable
nullableDefnSQL Null = NullConstraint ea "" nullableDefnSQL Null = NullConstraint ea ""
nullableDefnSQL NotNull = NotNullConstraint ea "" nullableDefnSQL NotNull = NotNullConstraint ea ""

View File

@ -5,11 +5,13 @@ module Ringo.Generator.Internal where
import qualified Data.Map as Map import qualified Data.Map as Map
import qualified Data.Text as Text import qualified Data.Text as Text
import Database.HsSqlPpp.Syntax (ScalarExpr)
import Data.List (find) import Data.List (find)
import Data.Monoid ((<>)) import Data.Monoid ((<>))
import Data.Text (Text) import Data.Text (Text)
import Ringo.Extractor.Internal import Ringo.Extractor.Internal
import Ringo.Generator.Sql
import Ringo.Types import Ringo.Types
joinColumnNames :: [ColumnName] -> Text joinColumnNames :: [ColumnName] -> Text
@ -25,12 +27,15 @@ dimColumnMapping dimPrefix fact dimTableName =
, dimPrefix <> dName == dimTableName ] , dimPrefix <> dName == dimTableName ]
coalesceColumn :: TypeDefaults -> TableName -> Column -> Text coalesceColumn :: TypeDefaults -> TableName -> Column -> Text
coalesceColumn defaults tName Column{..} = coalesceColumn defaults tName = ppScalarExpr . coalesceColumn' defaults tName
coalesceColumn' :: TypeDefaults -> TableName -> Column -> ScalarExpr
coalesceColumn' defaults tName Column{..} =
if columnNullable == Null if columnNullable == Null
then "coalesce(" <> fqColName <> ", " <> defVal columnType <> ")" then app "coalesce" [fqColName, num $ defVal columnType]
else fqColName else fqColName
where where
fqColName = fullColumnName tName columnName fqColName = eqi tName columnName
defVal colType = defVal colType =
maybe (error $ "Default value not known for column type: " ++ Text.unpack colType) snd maybe (error $ "Default value not known for column type: " ++ Text.unpack colType) snd

View File

@ -3,51 +3,58 @@
{-# LANGUAGE CPP #-} {-# LANGUAGE CPP #-}
module Ringo.Generator.Populate.Dimension (dimensionTablePopulateSQL) where module Ringo.Generator.Populate.Dimension (dimensionTablePopulateSQL) where
import qualified Data.Text as Text #if MIN_VERSION_base(4,8,0)
#else
import Control.Applicative ((<$>))
#endif
import Control.Monad.Reader (Reader, asks) import Control.Monad.Reader (Reader, asks)
import Data.Maybe (fromJust) import Database.HsSqlPpp.Syntax (Statement, QueryExpr(..), Distinct(..), makeSelect, JoinType(..))
import Data.Monoid ((<>)) import Data.Maybe (fromJust)
import Data.Text (Text) import Data.Text (Text)
import Ringo.Extractor.Internal import Ringo.Extractor.Internal
import Ringo.Generator.Internal import Ringo.Generator.Internal
import Ringo.Generator.Sql
import Ringo.Types import Ringo.Types
dimensionTablePopulateSQL :: TablePopulationMode -> Fact -> TableName -> Reader Env Text dimensionTablePopulateSQL :: TablePopulationMode -> Fact -> TableName -> Reader Env Text
dimensionTablePopulateSQL popMode fact dimTableName = do dimensionTablePopulateSQL popMode fact dimTableName =
ppSQL <$> dimensionTablePopulateSQL' popMode fact dimTableName
dimensionTablePopulateSQL' :: TablePopulationMode -> Fact -> TableName -> Reader Env Statement
dimensionTablePopulateSQL' popMode fact dimTableName = do
Settings {..} <- asks envSettings Settings {..} <- asks envSettings
tables <- asks envTables tables <- asks envTables
defaults <- asks envTypeDefaults defaults <- asks envTypeDefaults
let factTable = fromJust $ findTable (factTableName fact) tables let factTable = fromJust $ findTable (factTableName fact) tables
colMapping = dimColumnMapping settingDimPrefix fact dimTableName colMapping = dimColumnMapping settingDimPrefix fact dimTableName
selectCols = [ coalesceColumn defaults (factTableName fact) col <> " AS " <> cName selectCols = [ flip sia (nmc cName) $ coalesceColumn' defaults (factTableName fact) col
| (_, cName) <- colMapping | (_, cName) <- colMapping
, let col = fromJust . findColumn cName $ tableColumns factTable ] , let col = fromJust . findColumn cName $ tableColumns factTable ]
timeCol = head [ cName | DimTime cName <- factColumns fact ] timeCol = head [ cName | DimTime cName <- factColumns fact ]
baseSelectC = "SELECT DISTINCT\n" <> joinColumnNames selectCols isNotNullC = parens . foldBinop "or" . map (postop "isnotnull" . ei . snd) $ colMapping
<> "\nFROM " <> factTableName fact selectWhereC = Just . foldBinop "and" $
baseWhereCs = [ "(\n" [ isNotNullC, binop "<=" (ei timeCol) placeholder ] ++
<> Text.intercalate "\nOR " [ c <> " IS NOT NULL" | (_, c) <- colMapping ] [ binop ">" (ei timeCol) placeholder | popMode == IncrementalPopulation ]
<> "\n)" selectC = makeSelect
, timeCol <> " <= ?" { selDistinct = Distinct
] , selSelectList = sl selectCols
, selTref = [tref $ factTableName fact]
, selWhere = selectWhereC
}
insertC selectC whereCs = iTableName = suffixTableName popMode settingTableNameSuffixTemplate dimTableName
"INSERT INTO " insertC = insert iTableName (map fst colMapping) $ case popMode of
<> suffixTableName popMode settingTableNameSuffixTemplate dimTableName FullPopulation -> selectC
<> " (\n" <> joinColumnNames (map fst colMapping) <> "\n) " IncrementalPopulation -> let alias = "x" in
<> "SELECT x.* FROM (\n" makeSelect
<> selectC <> "\nWHERE " <> Text.intercalate " AND\n" whereCs { selSelectList = sl [si $ qstar alias]
<> ") x" , selTref =
[ tjoin (subtrefa alias selectC) LeftOuter (tref dimTableName) . Just $
foldBinop "and" [ binop "=" (eqi dimTableName c1) (eqi alias c2) | (c1, c2) <- colMapping ] ]
, selWhere =
Just . foldBinop "and" . map (postop "isnull" . eqi dimTableName . fst) $ colMapping
}
return $ case popMode of return insertC
FullPopulation -> insertC baseSelectC baseWhereCs
IncrementalPopulation ->
insertC baseSelectC (baseWhereCs ++ [ timeCol <> " > ?" ])
<> "\nLEFT JOIN " <> dimTableName <> " ON\n"
<> Text.intercalate " \nAND "
[ fullColumnName dimTableName c1 <> " = " <> fullColumnName "x" c2
| (c1, c2) <- colMapping ]
<> "\nWHERE " <> Text.intercalate " \nAND "
[ fullColumnName dimTableName c <> " IS NULL" | (c, _) <- colMapping ]

View File

@ -1,3 +1,4 @@
{-# LANGUAGE OverloadedStrings #-}
module Ringo.Generator.Sql where module Ringo.Generator.Sql where
import qualified Data.Text as Text import qualified Data.Text as Text
@ -18,9 +19,96 @@ name n = Name ea [nmc n]
nmc :: Text -> NameComponent nmc :: Text -> NameComponent
nmc = Nmc . Text.unpack nmc = Nmc . Text.unpack
att :: Text -> Text -> RowConstraint -> AttributeDef attDef :: Text -> Text -> RowConstraint -> AttributeDef
att nam typ constr = attDef nam typ constr =
AttributeDef ea (nmc nam) (SimpleTypeName ea $ name typ) Nothing [constr] AttributeDef ea (nmc nam) (SimpleTypeName ea $ name typ) Nothing [constr]
member :: ScalarExpr -> ScalarExpr -> ScalarExpr
member a b = BinaryOp ea (name ".") a b
num :: Text -> ScalarExpr
num n = NumberLit ea $ Text.unpack n
str :: Text -> ScalarExpr
str = StringLit ea . Text.unpack
app :: Text -> [ScalarExpr] -> ScalarExpr
app n as = App ea (name n) as
specop :: Text -> [ScalarExpr] -> ScalarExpr
specop n as = SpecialOp ea (name n) as
prefop :: Text -> ScalarExpr -> ScalarExpr
prefop n a = PrefixOp ea (name n) a
postop :: Text -> ScalarExpr -> ScalarExpr
postop n a = PostfixOp ea (name n) a
binop :: Text -> ScalarExpr -> ScalarExpr -> ScalarExpr
binop n a0 a1 = BinaryOp ea (name n) a0 a1
foldBinop :: Text -> [ScalarExpr] -> ScalarExpr
foldBinop _ [] = error "List must be non empty"
foldBinop n (a : as) = foldl (binop n) a as
placeholder :: ScalarExpr
placeholder = Placeholder ea
parens :: ScalarExpr -> ScalarExpr
parens = Parens ea
qstar :: Text -> ScalarExpr
qstar = QStar ea . nmc
-- Table ref
tref :: Text -> TableRef
tref s = Tref ea (name s)
-- Table ref alias
trefa :: Text -> Text -> TableRef
trefa t a = TableAlias ea (nmc a) $ Tref ea (name t)
-- Subquery Table ref alias
subtrefa :: Text -> QueryExpr -> TableRef
subtrefa a = TableAlias ea (nmc a) . SubTref ea
-- Table join
tjoin :: TableRef -> JoinType -> TableRef -> Maybe ScalarExpr -> TableRef
tjoin ta jt tb on = JoinTref ea ta Unnatural jt Nothing tb (fmap (JoinOn ea) on)
-- Select item
si :: ScalarExpr -> SelectItem
si = SelExp ea
-- Select item alias
sia :: ScalarExpr -> NameComponent -> SelectItem
sia e a = SelectItem ea e a
-- Expression qualified identifier
eqi :: Text -> Text -> ScalarExpr
eqi c x = Identifier ea $ qn c x
-- Expression identifier
ei :: Text -> ScalarExpr
ei = Identifier ea . name
-- Qualified name
qn :: Text -> Text -> Name
qn c n = Name ea [nmc c, nmc n]
-- Select list
sl :: [SelectItem] -> SelectList
sl = SelectList ea
insert :: Text -> [Text] -> QueryExpr -> Statement
insert tName cNames selectExp =
Insert ea (name tName) (map nmc cNames) selectExp Nothing
ppSQL :: Statement -> Text ppSQL :: Statement -> Text
ppSQL st = TL.toStrict $ prettyStatements (PrettyFlags postgresDialect) [st] ppSQL st = TL.toStrict $ prettyStatements (PrettyFlags postgresDialect) [st]
ppScalarExpr :: ScalarExpr -> Text
ppScalarExpr = TL.toStrict . prettyScalarExpr (PrettyFlags postgresDialect)
ppQueryExpr :: QueryExpr -> Text
ppQueryExpr = TL.toStrict . prettyQueryExpr (PrettyFlags postgresDialect)