Changes dimension populatation generator to use hssqlppp internally.

pull/1/head
Abhinav Sarkar 2016-01-04 01:32:36 +05:30
parent 4fe1006d0c
commit 6a107aaf8d
6 changed files with 265 additions and 42 deletions

View File

@ -56,7 +56,7 @@ writeFiles outputDir env@Env{..} = do
dimTables = [ (fact, extractDimensionTables env fact) | fact <- envFacts ]
factTables = [ (fact, extractFactTable env fact) | fact <- envFacts ]
dimTableDefnSQLs = [ (Create, tableName table, unlines . map sqlStr $ tableDefnSQL env table)
dimTableDefnSQLs = [ (Create, tableName table, unlines . map sqlStr $ dimensionTableDefnSQL env table)
| (_, tabs) <- dimTables
, table <- tabs
, table `notElem` envTables ]

View File

@ -9,7 +9,7 @@ module Ringo
, extractFactTable
, extractDimensionTables
, extractDependencies
, tableDefnSQL
, dimensionTableDefnSQL
, factTableDefnSQL
, dimensionTablePopulateSQL
, factTablePopulateSQL
@ -149,7 +149,7 @@ extractDependencies env = flip runReader env . E.extractDependencies
-- |
--
-- >>> let storySessionDimTables = extractDimensionTables env sessionFact
-- >>> let sqls = map (tableDefnSQL env) storySessionDimTables
-- >>> let sqls = map (dimensionTableDefnSQL env) storySessionDimTables
-- >>> mapM_ (\sqls -> mapM_ (putStr . Text.unpack) sqls >> putStrLn "--------" ) sqls
-- create table dim_geo (
-- id serial not null,
@ -189,8 +189,8 @@ extractDependencies env = flip runReader env . E.extractDependencies
-- device);
-- <BLANKLINE>
-- --------
tableDefnSQL :: Env -> Table -> [Text]
tableDefnSQL env = flip runReader env . G.tableDefnSQL
dimensionTableDefnSQL :: Env -> Table -> [Text]
dimensionTableDefnSQL env = flip runReader env . G.tableDefnSQL
-- |
--
@ -222,6 +222,129 @@ tableDefnSQL env = flip runReader env . G.tableDefnSQL
factTableDefnSQL :: Env -> Fact -> Table -> [Text]
factTableDefnSQL env fact = flip runReader env . G.factTableDefnSQL fact
-- |
--
-- >>> let storySessionDimTableNames = map tableName $ extractDimensionTables env sessionFact
-- >>> let sqls = map (dimensionTablePopulateSQL FullPopulation env sessionFact) storySessionDimTableNames
-- >>> mapM_ (putStr . Text.unpack) sqls
-- insert into dim_geo (country_name,
-- city_name,
-- continent_name,
-- most_specific_subdivision_name,
-- time_zone)
-- select distinct
-- coalesce(session_events.geo_country_name,'__UNKNOWN_VAL__') as geo_country_name,
-- coalesce(session_events.geo_city_name,'__UNKNOWN_VAL__') as geo_city_name,
-- coalesce(session_events.geo_continent_name,'__UNKNOWN_VAL__') as geo_continent_name,
-- coalesce(session_events.geo_most_specific_subdivision_name,'__UNKNOWN_VAL__') as geo_most_specific_subdivision_name,
-- coalesce(session_events.geo_time_zone,'__UNKNOWN_VAL__') as geo_time_zone
-- from
-- session_events
-- where
-- (geo_country_name is not null or geo_city_name is not null or geo_continent_name is not null or geo_most_specific_subdivision_name is not null or geo_time_zone is not null)
-- and
-- created_at <= ?
-- ;
-- <BLANKLINE>
-- insert into dim_user_agent (browser_name, os, name, type, device)
-- select distinct
-- coalesce(session_events.browser_name,'__UNKNOWN_VAL__') as browser_name,
-- coalesce(session_events.os,'__UNKNOWN_VAL__') as os,
-- coalesce(session_events.user_agent_name,'__UNKNOWN_VAL__') as user_agent_name,
-- coalesce(session_events.user_agent_type,'__UNKNOWN_VAL__') as user_agent_type,
-- coalesce(session_events.user_agent_device,'__UNKNOWN_VAL__') as user_agent_device
-- from
-- session_events
-- where
-- (browser_name is not null or os is not null or user_agent_name is not null or user_agent_type is not null or user_agent_device is not null)
-- and
-- created_at <= ?
-- ;
-- <BLANKLINE>
-- >>> let sqls = map (dimensionTablePopulateSQL IncrementalPopulation env sessionFact) storySessionDimTableNames
-- >>> mapM_ (putStr . Text.unpack) sqls
-- insert into dim_geo (country_name,
-- city_name,
-- continent_name,
-- most_specific_subdivision_name,
-- time_zone)
-- select
-- x.*
-- from
-- (select distinct
-- coalesce(session_events.geo_country_name,'__UNKNOWN_VAL__') as geo_country_name,
-- coalesce(session_events.geo_city_name,'__UNKNOWN_VAL__') as geo_city_name,
-- coalesce(session_events.geo_continent_name,'__UNKNOWN_VAL__') as geo_continent_name,
-- coalesce(session_events.geo_most_specific_subdivision_name,'__UNKNOWN_VAL__') as geo_most_specific_subdivision_name,
-- coalesce(session_events.geo_time_zone,'__UNKNOWN_VAL__') as geo_time_zone
-- from
-- session_events
-- where
-- (geo_country_name is not null or geo_city_name is not null or geo_continent_name is not null or geo_most_specific_subdivision_name is not null or geo_time_zone is not null)
-- and
-- created_at <= ?
-- and
-- created_at > ?) as x
-- left outer join
-- dim_geo
-- on dim_geo.country_name = x.geo_country_name
-- and
-- dim_geo.city_name = x.geo_city_name
-- and
-- dim_geo.continent_name = x.geo_continent_name
-- and
-- dim_geo.most_specific_subdivision_name = x.geo_most_specific_subdivision_name
-- and
-- dim_geo.time_zone = x.geo_time_zone
-- where
-- dim_geo.country_name is null and dim_geo.city_name is null
-- and
-- dim_geo.continent_name is null
-- and
-- dim_geo.most_specific_subdivision_name is null
-- and
-- dim_geo.time_zone is null
-- ;
-- <BLANKLINE>
-- insert into dim_user_agent (browser_name, os, name, type, device)
-- select
-- x.*
-- from
-- (select distinct
-- coalesce(session_events.browser_name,'__UNKNOWN_VAL__') as browser_name,
-- coalesce(session_events.os,'__UNKNOWN_VAL__') as os,
-- coalesce(session_events.user_agent_name,'__UNKNOWN_VAL__') as user_agent_name,
-- coalesce(session_events.user_agent_type,'__UNKNOWN_VAL__') as user_agent_type,
-- coalesce(session_events.user_agent_device,'__UNKNOWN_VAL__') as user_agent_device
-- from
-- session_events
-- where
-- (browser_name is not null or os is not null or user_agent_name is not null or user_agent_type is not null or user_agent_device is not null)
-- and
-- created_at <= ?
-- and
-- created_at > ?) as x
-- left outer join
-- dim_user_agent
-- on dim_user_agent.browser_name = x.browser_name
-- and
-- dim_user_agent.os = x.os
-- and
-- dim_user_agent.name = x.user_agent_name
-- and
-- dim_user_agent.type = x.user_agent_type
-- and
-- dim_user_agent.device = x.user_agent_device
-- where
-- dim_user_agent.browser_name is null and dim_user_agent.os is null
-- and
-- dim_user_agent.name is null
-- and
-- dim_user_agent.type is null
-- and
-- dim_user_agent.device is null
-- ;
-- <BLANKLINE>
dimensionTablePopulateSQL :: TablePopulationMode -> Env -> Fact -> TableName -> Text
dimensionTablePopulateSQL popMode env fact =
flip runReader env . G.dimensionTablePopulateSQL popMode fact

View File

@ -31,7 +31,7 @@ tableDefnSQL' Table {..} = do
tableSQL = CreateTable ea (name tabName) (map columnDefnSQL tableColumns) [] Nothing
columnDefnSQL Column {..} =
att columnName columnType $ nullableDefnSQL columnNullable
attDef columnName columnType $ nullableDefnSQL columnNullable
nullableDefnSQL Null = NullConstraint ea ""
nullableDefnSQL NotNull = NotNullConstraint ea ""

View File

@ -5,11 +5,13 @@ module Ringo.Generator.Internal where
import qualified Data.Map as Map
import qualified Data.Text as Text
import Database.HsSqlPpp.Syntax (ScalarExpr)
import Data.List (find)
import Data.Monoid ((<>))
import Data.Text (Text)
import Ringo.Extractor.Internal
import Ringo.Generator.Sql
import Ringo.Types
joinColumnNames :: [ColumnName] -> Text
@ -25,12 +27,15 @@ dimColumnMapping dimPrefix fact dimTableName =
, dimPrefix <> dName == dimTableName ]
coalesceColumn :: TypeDefaults -> TableName -> Column -> Text
coalesceColumn defaults tName Column{..} =
coalesceColumn defaults tName = ppScalarExpr . coalesceColumn' defaults tName
coalesceColumn' :: TypeDefaults -> TableName -> Column -> ScalarExpr
coalesceColumn' defaults tName Column{..} =
if columnNullable == Null
then "coalesce(" <> fqColName <> ", " <> defVal columnType <> ")"
then app "coalesce" [fqColName, num $ defVal columnType]
else fqColName
where
fqColName = fullColumnName tName columnName
fqColName = eqi tName columnName
defVal colType =
maybe (error $ "Default value not known for column type: " ++ Text.unpack colType) snd

View File

@ -3,51 +3,58 @@
{-# LANGUAGE CPP #-}
module Ringo.Generator.Populate.Dimension (dimensionTablePopulateSQL) where
import qualified Data.Text as Text
#if MIN_VERSION_base(4,8,0)
#else
import Control.Applicative ((<$>))
#endif
import Control.Monad.Reader (Reader, asks)
import Database.HsSqlPpp.Syntax (Statement, QueryExpr(..), Distinct(..), makeSelect, JoinType(..))
import Data.Maybe (fromJust)
import Data.Monoid ((<>))
import Data.Text (Text)
import Ringo.Extractor.Internal
import Ringo.Generator.Internal
import Ringo.Generator.Sql
import Ringo.Types
dimensionTablePopulateSQL :: TablePopulationMode -> Fact -> TableName -> Reader Env Text
dimensionTablePopulateSQL popMode fact dimTableName = do
dimensionTablePopulateSQL popMode fact dimTableName =
ppSQL <$> dimensionTablePopulateSQL' popMode fact dimTableName
dimensionTablePopulateSQL' :: TablePopulationMode -> Fact -> TableName -> Reader Env Statement
dimensionTablePopulateSQL' popMode fact dimTableName = do
Settings {..} <- asks envSettings
tables <- asks envTables
defaults <- asks envTypeDefaults
let factTable = fromJust $ findTable (factTableName fact) tables
colMapping = dimColumnMapping settingDimPrefix fact dimTableName
selectCols = [ coalesceColumn defaults (factTableName fact) col <> " AS " <> cName
selectCols = [ flip sia (nmc cName) $ coalesceColumn' defaults (factTableName fact) col
| (_, cName) <- colMapping
, let col = fromJust . findColumn cName $ tableColumns factTable ]
timeCol = head [ cName | DimTime cName <- factColumns fact ]
baseSelectC = "SELECT DISTINCT\n" <> joinColumnNames selectCols
<> "\nFROM " <> factTableName fact
baseWhereCs = [ "(\n"
<> Text.intercalate "\nOR " [ c <> " IS NOT NULL" | (_, c) <- colMapping ]
<> "\n)"
, timeCol <> " <= ?"
]
isNotNullC = parens . foldBinop "or" . map (postop "isnotnull" . ei . snd) $ colMapping
selectWhereC = Just . foldBinop "and" $
[ isNotNullC, binop "<=" (ei timeCol) placeholder ] ++
[ binop ">" (ei timeCol) placeholder | popMode == IncrementalPopulation ]
selectC = makeSelect
{ selDistinct = Distinct
, selSelectList = sl selectCols
, selTref = [tref $ factTableName fact]
, selWhere = selectWhereC
}
insertC selectC whereCs =
"INSERT INTO "
<> suffixTableName popMode settingTableNameSuffixTemplate dimTableName
<> " (\n" <> joinColumnNames (map fst colMapping) <> "\n) "
<> "SELECT x.* FROM (\n"
<> selectC <> "\nWHERE " <> Text.intercalate " AND\n" whereCs
<> ") x"
iTableName = suffixTableName popMode settingTableNameSuffixTemplate dimTableName
insertC = insert iTableName (map fst colMapping) $ case popMode of
FullPopulation -> selectC
IncrementalPopulation -> let alias = "x" in
makeSelect
{ selSelectList = sl [si $ qstar alias]
, selTref =
[ tjoin (subtrefa alias selectC) LeftOuter (tref dimTableName) . Just $
foldBinop "and" [ binop "=" (eqi dimTableName c1) (eqi alias c2) | (c1, c2) <- colMapping ] ]
, selWhere =
Just . foldBinop "and" . map (postop "isnull" . eqi dimTableName . fst) $ colMapping
}
return $ case popMode of
FullPopulation -> insertC baseSelectC baseWhereCs
IncrementalPopulation ->
insertC baseSelectC (baseWhereCs ++ [ timeCol <> " > ?" ])
<> "\nLEFT JOIN " <> dimTableName <> " ON\n"
<> Text.intercalate " \nAND "
[ fullColumnName dimTableName c1 <> " = " <> fullColumnName "x" c2
| (c1, c2) <- colMapping ]
<> "\nWHERE " <> Text.intercalate " \nAND "
[ fullColumnName dimTableName c <> " IS NULL" | (c, _) <- colMapping ]
return insertC

View File

@ -1,3 +1,4 @@
{-# LANGUAGE OverloadedStrings #-}
module Ringo.Generator.Sql where
import qualified Data.Text as Text
@ -18,9 +19,96 @@ name n = Name ea [nmc n]
nmc :: Text -> NameComponent
nmc = Nmc . Text.unpack
att :: Text -> Text -> RowConstraint -> AttributeDef
att nam typ constr =
attDef :: Text -> Text -> RowConstraint -> AttributeDef
attDef nam typ constr =
AttributeDef ea (nmc nam) (SimpleTypeName ea $ name typ) Nothing [constr]
member :: ScalarExpr -> ScalarExpr -> ScalarExpr
member a b = BinaryOp ea (name ".") a b
num :: Text -> ScalarExpr
num n = NumberLit ea $ Text.unpack n
str :: Text -> ScalarExpr
str = StringLit ea . Text.unpack
app :: Text -> [ScalarExpr] -> ScalarExpr
app n as = App ea (name n) as
specop :: Text -> [ScalarExpr] -> ScalarExpr
specop n as = SpecialOp ea (name n) as
prefop :: Text -> ScalarExpr -> ScalarExpr
prefop n a = PrefixOp ea (name n) a
postop :: Text -> ScalarExpr -> ScalarExpr
postop n a = PostfixOp ea (name n) a
binop :: Text -> ScalarExpr -> ScalarExpr -> ScalarExpr
binop n a0 a1 = BinaryOp ea (name n) a0 a1
foldBinop :: Text -> [ScalarExpr] -> ScalarExpr
foldBinop _ [] = error "List must be non empty"
foldBinop n (a : as) = foldl (binop n) a as
placeholder :: ScalarExpr
placeholder = Placeholder ea
parens :: ScalarExpr -> ScalarExpr
parens = Parens ea
qstar :: Text -> ScalarExpr
qstar = QStar ea . nmc
-- Table ref
tref :: Text -> TableRef
tref s = Tref ea (name s)
-- Table ref alias
trefa :: Text -> Text -> TableRef
trefa t a = TableAlias ea (nmc a) $ Tref ea (name t)
-- Subquery Table ref alias
subtrefa :: Text -> QueryExpr -> TableRef
subtrefa a = TableAlias ea (nmc a) . SubTref ea
-- Table join
tjoin :: TableRef -> JoinType -> TableRef -> Maybe ScalarExpr -> TableRef
tjoin ta jt tb on = JoinTref ea ta Unnatural jt Nothing tb (fmap (JoinOn ea) on)
-- Select item
si :: ScalarExpr -> SelectItem
si = SelExp ea
-- Select item alias
sia :: ScalarExpr -> NameComponent -> SelectItem
sia e a = SelectItem ea e a
-- Expression qualified identifier
eqi :: Text -> Text -> ScalarExpr
eqi c x = Identifier ea $ qn c x
-- Expression identifier
ei :: Text -> ScalarExpr
ei = Identifier ea . name
-- Qualified name
qn :: Text -> Text -> Name
qn c n = Name ea [nmc c, nmc n]
-- Select list
sl :: [SelectItem] -> SelectList
sl = SelectList ea
insert :: Text -> [Text] -> QueryExpr -> Statement
insert tName cNames selectExp =
Insert ea (name tName) (map nmc cNames) selectExp Nothing
ppSQL :: Statement -> Text
ppSQL st = TL.toStrict $ prettyStatements (PrettyFlags postgresDialect) [st]
ppScalarExpr :: ScalarExpr -> Text
ppScalarExpr = TL.toStrict . prettyScalarExpr (PrettyFlags postgresDialect)
ppQueryExpr :: QueryExpr -> Text
ppQueryExpr = TL.toStrict . prettyQueryExpr (PrettyFlags postgresDialect)