diff --git a/app/Main.hs b/app/Main.hs index 079ee28..2fa0119 100644 --- a/app/Main.hs +++ b/app/Main.hs @@ -56,7 +56,7 @@ writeFiles outputDir env@Env{..} = do dimTables = [ (fact, extractDimensionTables env fact) | fact <- envFacts ] factTables = [ (fact, extractFactTable env fact) | fact <- envFacts ] - dimTableDefnSQLs = [ (Create, tableName table, unlines . map sqlStr $ tableDefnSQL env table) + dimTableDefnSQLs = [ (Create, tableName table, unlines . map sqlStr $ dimensionTableDefnSQL env table) | (_, tabs) <- dimTables , table <- tabs , table `notElem` envTables ] diff --git a/src/Ringo.hs b/src/Ringo.hs index 2d8a3c2..eb41d13 100644 --- a/src/Ringo.hs +++ b/src/Ringo.hs @@ -9,7 +9,7 @@ module Ringo , extractFactTable , extractDimensionTables , extractDependencies - , tableDefnSQL + , dimensionTableDefnSQL , factTableDefnSQL , dimensionTablePopulateSQL , factTablePopulateSQL @@ -149,7 +149,7 @@ extractDependencies env = flip runReader env . E.extractDependencies -- | -- -- >>> let storySessionDimTables = extractDimensionTables env sessionFact --- >>> let sqls = map (tableDefnSQL env) storySessionDimTables +-- >>> let sqls = map (dimensionTableDefnSQL env) storySessionDimTables -- >>> mapM_ (\sqls -> mapM_ (putStr . Text.unpack) sqls >> putStrLn "--------" ) sqls -- create table dim_geo ( -- id serial not null, @@ -189,8 +189,8 @@ extractDependencies env = flip runReader env . E.extractDependencies -- device); -- -- -------- -tableDefnSQL :: Env -> Table -> [Text] -tableDefnSQL env = flip runReader env . G.tableDefnSQL +dimensionTableDefnSQL :: Env -> Table -> [Text] +dimensionTableDefnSQL env = flip runReader env . G.tableDefnSQL -- | -- @@ -222,6 +222,129 @@ tableDefnSQL env = flip runReader env . G.tableDefnSQL factTableDefnSQL :: Env -> Fact -> Table -> [Text] factTableDefnSQL env fact = flip runReader env . G.factTableDefnSQL fact +-- | +-- +-- >>> let storySessionDimTableNames = map tableName $ extractDimensionTables env sessionFact +-- >>> let sqls = map (dimensionTablePopulateSQL FullPopulation env sessionFact) storySessionDimTableNames +-- >>> mapM_ (putStr . Text.unpack) sqls +-- insert into dim_geo (country_name, +-- city_name, +-- continent_name, +-- most_specific_subdivision_name, +-- time_zone) +-- select distinct +-- coalesce(session_events.geo_country_name,'__UNKNOWN_VAL__') as geo_country_name, +-- coalesce(session_events.geo_city_name,'__UNKNOWN_VAL__') as geo_city_name, +-- coalesce(session_events.geo_continent_name,'__UNKNOWN_VAL__') as geo_continent_name, +-- coalesce(session_events.geo_most_specific_subdivision_name,'__UNKNOWN_VAL__') as geo_most_specific_subdivision_name, +-- coalesce(session_events.geo_time_zone,'__UNKNOWN_VAL__') as geo_time_zone +-- from +-- session_events +-- where +-- (geo_country_name is not null or geo_city_name is not null or geo_continent_name is not null or geo_most_specific_subdivision_name is not null or geo_time_zone is not null) +-- and +-- created_at <= ? +-- ; +-- +-- insert into dim_user_agent (browser_name, os, name, type, device) +-- select distinct +-- coalesce(session_events.browser_name,'__UNKNOWN_VAL__') as browser_name, +-- coalesce(session_events.os,'__UNKNOWN_VAL__') as os, +-- coalesce(session_events.user_agent_name,'__UNKNOWN_VAL__') as user_agent_name, +-- coalesce(session_events.user_agent_type,'__UNKNOWN_VAL__') as user_agent_type, +-- coalesce(session_events.user_agent_device,'__UNKNOWN_VAL__') as user_agent_device +-- from +-- session_events +-- where +-- (browser_name is not null or os is not null or user_agent_name is not null or user_agent_type is not null or user_agent_device is not null) +-- and +-- created_at <= ? +-- ; +-- +-- >>> let sqls = map (dimensionTablePopulateSQL IncrementalPopulation env sessionFact) storySessionDimTableNames +-- >>> mapM_ (putStr . Text.unpack) sqls +-- insert into dim_geo (country_name, +-- city_name, +-- continent_name, +-- most_specific_subdivision_name, +-- time_zone) +-- select +-- x.* +-- from +-- (select distinct +-- coalesce(session_events.geo_country_name,'__UNKNOWN_VAL__') as geo_country_name, +-- coalesce(session_events.geo_city_name,'__UNKNOWN_VAL__') as geo_city_name, +-- coalesce(session_events.geo_continent_name,'__UNKNOWN_VAL__') as geo_continent_name, +-- coalesce(session_events.geo_most_specific_subdivision_name,'__UNKNOWN_VAL__') as geo_most_specific_subdivision_name, +-- coalesce(session_events.geo_time_zone,'__UNKNOWN_VAL__') as geo_time_zone +-- from +-- session_events +-- where +-- (geo_country_name is not null or geo_city_name is not null or geo_continent_name is not null or geo_most_specific_subdivision_name is not null or geo_time_zone is not null) +-- and +-- created_at <= ? +-- and +-- created_at > ?) as x +-- left outer join +-- dim_geo +-- on dim_geo.country_name = x.geo_country_name +-- and +-- dim_geo.city_name = x.geo_city_name +-- and +-- dim_geo.continent_name = x.geo_continent_name +-- and +-- dim_geo.most_specific_subdivision_name = x.geo_most_specific_subdivision_name +-- and +-- dim_geo.time_zone = x.geo_time_zone +-- where +-- dim_geo.country_name is null and dim_geo.city_name is null +-- and +-- dim_geo.continent_name is null +-- and +-- dim_geo.most_specific_subdivision_name is null +-- and +-- dim_geo.time_zone is null +-- ; +-- +-- insert into dim_user_agent (browser_name, os, name, type, device) +-- select +-- x.* +-- from +-- (select distinct +-- coalesce(session_events.browser_name,'__UNKNOWN_VAL__') as browser_name, +-- coalesce(session_events.os,'__UNKNOWN_VAL__') as os, +-- coalesce(session_events.user_agent_name,'__UNKNOWN_VAL__') as user_agent_name, +-- coalesce(session_events.user_agent_type,'__UNKNOWN_VAL__') as user_agent_type, +-- coalesce(session_events.user_agent_device,'__UNKNOWN_VAL__') as user_agent_device +-- from +-- session_events +-- where +-- (browser_name is not null or os is not null or user_agent_name is not null or user_agent_type is not null or user_agent_device is not null) +-- and +-- created_at <= ? +-- and +-- created_at > ?) as x +-- left outer join +-- dim_user_agent +-- on dim_user_agent.browser_name = x.browser_name +-- and +-- dim_user_agent.os = x.os +-- and +-- dim_user_agent.name = x.user_agent_name +-- and +-- dim_user_agent.type = x.user_agent_type +-- and +-- dim_user_agent.device = x.user_agent_device +-- where +-- dim_user_agent.browser_name is null and dim_user_agent.os is null +-- and +-- dim_user_agent.name is null +-- and +-- dim_user_agent.type is null +-- and +-- dim_user_agent.device is null +-- ; +-- dimensionTablePopulateSQL :: TablePopulationMode -> Env -> Fact -> TableName -> Text dimensionTablePopulateSQL popMode env fact = flip runReader env . G.dimensionTablePopulateSQL popMode fact diff --git a/src/Ringo/Generator/Create.hs b/src/Ringo/Generator/Create.hs index 5570f42..304b02b 100644 --- a/src/Ringo/Generator/Create.hs +++ b/src/Ringo/Generator/Create.hs @@ -31,7 +31,7 @@ tableDefnSQL' Table {..} = do tableSQL = CreateTable ea (name tabName) (map columnDefnSQL tableColumns) [] Nothing columnDefnSQL Column {..} = - att columnName columnType $ nullableDefnSQL columnNullable + attDef columnName columnType $ nullableDefnSQL columnNullable nullableDefnSQL Null = NullConstraint ea "" nullableDefnSQL NotNull = NotNullConstraint ea "" diff --git a/src/Ringo/Generator/Internal.hs b/src/Ringo/Generator/Internal.hs index 3b776f8..a2984c7 100644 --- a/src/Ringo/Generator/Internal.hs +++ b/src/Ringo/Generator/Internal.hs @@ -5,11 +5,13 @@ module Ringo.Generator.Internal where import qualified Data.Map as Map import qualified Data.Text as Text +import Database.HsSqlPpp.Syntax (ScalarExpr) import Data.List (find) import Data.Monoid ((<>)) import Data.Text (Text) import Ringo.Extractor.Internal +import Ringo.Generator.Sql import Ringo.Types joinColumnNames :: [ColumnName] -> Text @@ -25,12 +27,15 @@ dimColumnMapping dimPrefix fact dimTableName = , dimPrefix <> dName == dimTableName ] coalesceColumn :: TypeDefaults -> TableName -> Column -> Text -coalesceColumn defaults tName Column{..} = +coalesceColumn defaults tName = ppScalarExpr . coalesceColumn' defaults tName + +coalesceColumn' :: TypeDefaults -> TableName -> Column -> ScalarExpr +coalesceColumn' defaults tName Column{..} = if columnNullable == Null - then "coalesce(" <> fqColName <> ", " <> defVal columnType <> ")" + then app "coalesce" [fqColName, num $ defVal columnType] else fqColName where - fqColName = fullColumnName tName columnName + fqColName = eqi tName columnName defVal colType = maybe (error $ "Default value not known for column type: " ++ Text.unpack colType) snd diff --git a/src/Ringo/Generator/Populate/Dimension.hs b/src/Ringo/Generator/Populate/Dimension.hs index 03d38d5..abed632 100644 --- a/src/Ringo/Generator/Populate/Dimension.hs +++ b/src/Ringo/Generator/Populate/Dimension.hs @@ -3,51 +3,58 @@ {-# LANGUAGE CPP #-} module Ringo.Generator.Populate.Dimension (dimensionTablePopulateSQL) where -import qualified Data.Text as Text +#if MIN_VERSION_base(4,8,0) +#else +import Control.Applicative ((<$>)) +#endif -import Control.Monad.Reader (Reader, asks) -import Data.Maybe (fromJust) -import Data.Monoid ((<>)) -import Data.Text (Text) +import Control.Monad.Reader (Reader, asks) +import Database.HsSqlPpp.Syntax (Statement, QueryExpr(..), Distinct(..), makeSelect, JoinType(..)) +import Data.Maybe (fromJust) +import Data.Text (Text) import Ringo.Extractor.Internal import Ringo.Generator.Internal +import Ringo.Generator.Sql import Ringo.Types dimensionTablePopulateSQL :: TablePopulationMode -> Fact -> TableName -> Reader Env Text -dimensionTablePopulateSQL popMode fact dimTableName = do +dimensionTablePopulateSQL popMode fact dimTableName = + ppSQL <$> dimensionTablePopulateSQL' popMode fact dimTableName + +dimensionTablePopulateSQL' :: TablePopulationMode -> Fact -> TableName -> Reader Env Statement +dimensionTablePopulateSQL' popMode fact dimTableName = do Settings {..} <- asks envSettings tables <- asks envTables defaults <- asks envTypeDefaults let factTable = fromJust $ findTable (factTableName fact) tables colMapping = dimColumnMapping settingDimPrefix fact dimTableName - selectCols = [ coalesceColumn defaults (factTableName fact) col <> " AS " <> cName + selectCols = [ flip sia (nmc cName) $ coalesceColumn' defaults (factTableName fact) col | (_, cName) <- colMapping , let col = fromJust . findColumn cName $ tableColumns factTable ] timeCol = head [ cName | DimTime cName <- factColumns fact ] - baseSelectC = "SELECT DISTINCT\n" <> joinColumnNames selectCols - <> "\nFROM " <> factTableName fact - baseWhereCs = [ "(\n" - <> Text.intercalate "\nOR " [ c <> " IS NOT NULL" | (_, c) <- colMapping ] - <> "\n)" - , timeCol <> " <= ?" - ] + isNotNullC = parens . foldBinop "or" . map (postop "isnotnull" . ei . snd) $ colMapping + selectWhereC = Just . foldBinop "and" $ + [ isNotNullC, binop "<=" (ei timeCol) placeholder ] ++ + [ binop ">" (ei timeCol) placeholder | popMode == IncrementalPopulation ] + selectC = makeSelect + { selDistinct = Distinct + , selSelectList = sl selectCols + , selTref = [tref $ factTableName fact] + , selWhere = selectWhereC + } - insertC selectC whereCs = - "INSERT INTO " - <> suffixTableName popMode settingTableNameSuffixTemplate dimTableName - <> " (\n" <> joinColumnNames (map fst colMapping) <> "\n) " - <> "SELECT x.* FROM (\n" - <> selectC <> "\nWHERE " <> Text.intercalate " AND\n" whereCs - <> ") x" + iTableName = suffixTableName popMode settingTableNameSuffixTemplate dimTableName + insertC = insert iTableName (map fst colMapping) $ case popMode of + FullPopulation -> selectC + IncrementalPopulation -> let alias = "x" in + makeSelect + { selSelectList = sl [si $ qstar alias] + , selTref = + [ tjoin (subtrefa alias selectC) LeftOuter (tref dimTableName) . Just $ + foldBinop "and" [ binop "=" (eqi dimTableName c1) (eqi alias c2) | (c1, c2) <- colMapping ] ] + , selWhere = + Just . foldBinop "and" . map (postop "isnull" . eqi dimTableName . fst) $ colMapping + } - return $ case popMode of - FullPopulation -> insertC baseSelectC baseWhereCs - IncrementalPopulation -> - insertC baseSelectC (baseWhereCs ++ [ timeCol <> " > ?" ]) - <> "\nLEFT JOIN " <> dimTableName <> " ON\n" - <> Text.intercalate " \nAND " - [ fullColumnName dimTableName c1 <> " = " <> fullColumnName "x" c2 - | (c1, c2) <- colMapping ] - <> "\nWHERE " <> Text.intercalate " \nAND " - [ fullColumnName dimTableName c <> " IS NULL" | (c, _) <- colMapping ] + return insertC diff --git a/src/Ringo/Generator/Sql.hs b/src/Ringo/Generator/Sql.hs index 2a8ed41..0bd19cf 100644 --- a/src/Ringo/Generator/Sql.hs +++ b/src/Ringo/Generator/Sql.hs @@ -1,3 +1,4 @@ +{-# LANGUAGE OverloadedStrings #-} module Ringo.Generator.Sql where import qualified Data.Text as Text @@ -18,9 +19,96 @@ name n = Name ea [nmc n] nmc :: Text -> NameComponent nmc = Nmc . Text.unpack -att :: Text -> Text -> RowConstraint -> AttributeDef -att nam typ constr = +attDef :: Text -> Text -> RowConstraint -> AttributeDef +attDef nam typ constr = AttributeDef ea (nmc nam) (SimpleTypeName ea $ name typ) Nothing [constr] +member :: ScalarExpr -> ScalarExpr -> ScalarExpr +member a b = BinaryOp ea (name ".") a b + +num :: Text -> ScalarExpr +num n = NumberLit ea $ Text.unpack n + +str :: Text -> ScalarExpr +str = StringLit ea . Text.unpack + +app :: Text -> [ScalarExpr] -> ScalarExpr +app n as = App ea (name n) as + +specop :: Text -> [ScalarExpr] -> ScalarExpr +specop n as = SpecialOp ea (name n) as + +prefop :: Text -> ScalarExpr -> ScalarExpr +prefop n a = PrefixOp ea (name n) a + +postop :: Text -> ScalarExpr -> ScalarExpr +postop n a = PostfixOp ea (name n) a + +binop :: Text -> ScalarExpr -> ScalarExpr -> ScalarExpr +binop n a0 a1 = BinaryOp ea (name n) a0 a1 + +foldBinop :: Text -> [ScalarExpr] -> ScalarExpr +foldBinop _ [] = error "List must be non empty" +foldBinop n (a : as) = foldl (binop n) a as + +placeholder :: ScalarExpr +placeholder = Placeholder ea + +parens :: ScalarExpr -> ScalarExpr +parens = Parens ea + +qstar :: Text -> ScalarExpr +qstar = QStar ea . nmc + +-- Table ref +tref :: Text -> TableRef +tref s = Tref ea (name s) + +-- Table ref alias +trefa :: Text -> Text -> TableRef +trefa t a = TableAlias ea (nmc a) $ Tref ea (name t) + +-- Subquery Table ref alias +subtrefa :: Text -> QueryExpr -> TableRef +subtrefa a = TableAlias ea (nmc a) . SubTref ea + +-- Table join +tjoin :: TableRef -> JoinType -> TableRef -> Maybe ScalarExpr -> TableRef +tjoin ta jt tb on = JoinTref ea ta Unnatural jt Nothing tb (fmap (JoinOn ea) on) + +-- Select item +si :: ScalarExpr -> SelectItem +si = SelExp ea + +-- Select item alias +sia :: ScalarExpr -> NameComponent -> SelectItem +sia e a = SelectItem ea e a + +-- Expression qualified identifier +eqi :: Text -> Text -> ScalarExpr +eqi c x = Identifier ea $ qn c x + +-- Expression identifier +ei :: Text -> ScalarExpr +ei = Identifier ea . name + +-- Qualified name +qn :: Text -> Text -> Name +qn c n = Name ea [nmc c, nmc n] + +-- Select list +sl :: [SelectItem] -> SelectList +sl = SelectList ea + +insert :: Text -> [Text] -> QueryExpr -> Statement +insert tName cNames selectExp = + Insert ea (name tName) (map nmc cNames) selectExp Nothing + ppSQL :: Statement -> Text ppSQL st = TL.toStrict $ prettyStatements (PrettyFlags postgresDialect) [st] + +ppScalarExpr :: ScalarExpr -> Text +ppScalarExpr = TL.toStrict . prettyScalarExpr (PrettyFlags postgresDialect) + +ppQueryExpr :: QueryExpr -> Text +ppQueryExpr = TL.toStrict . prettyQueryExpr (PrettyFlags postgresDialect)