Merge pull request #1 from quintype/hssqlppp

Use hssqlppp as the SQL generator
pull/1/head
Abhinav Sarkar 8 years ago
commit b58e022b0e

@ -17,6 +17,7 @@ env:
- ARGS="--resolver lts-2"
- ARGS="--resolver lts-3"
- ARGS="--resolver lts-4"
- ARGS="--resolver lts-5"
- ARGS="--resolver lts"
- ARGS="--resolver nightly"
@ -29,7 +30,7 @@ before_install:
# This line does all of the work: installs GHC if necessary, build the library,
# executables, and test suites, and runs the test suites. --no-terminal works
# around some quirks in Travis's terminal implementation.
script: stack $ARGS --no-terminal --install-ghc install
script: stack $ARGS --no-terminal --install-ghc install --test --fast
# Caching so the next build will be fast too.
cache:

@ -1,3 +1,5 @@
{-# LANGUAGE OverloadedStrings #-}
{-# LANGUAGE RecordWildCards #-}
module Main where
import qualified Data.ByteString.Lazy as BS
@ -7,7 +9,6 @@ import qualified Data.Text as Text
import Data.Aeson (encode)
import Data.Char (toLower)
import Data.List (nub)
import Data.Monoid ((<>))
import Control.Monad (forM_)
import System.Directory (createDirectoryIfMissing)
import System.FilePath ((</>), (<.>))
@ -80,4 +81,4 @@ writeFiles outputDir env@Env{..} = do
, factTablePopulateSQLs IncRefresh $ factTablePopulateSQL IncrementalPopulation
]
sqlStr s = Text.unpack $ s <> ";\n"
sqlStr = Text.unpack

@ -1,3 +1,5 @@
{-# LANGUAGE OverloadedStrings #-}
{-# LANGUAGE RecordWildCards #-}
module Ringo.ArgParser (ProgArgs(..), parseArgs) where
import qualified Data.Text as Text

@ -1,3 +1,6 @@
{-# LANGUAGE OverloadedStrings #-}
{-# LANGUAGE RecordWildCards #-}
{-# LANGUAGE CPP #-}
module Ringo.InputParser (parseInput) where
import qualified Data.Text as Text
@ -50,17 +53,17 @@ instance FromJSON FactColumn where
parseJSON (Object o) = do
cType <- o .: "type"
case cType of
"dimtime" -> DimTime <$> o .: "column"
"nodimid" -> NoDimId <$> o .: "column"
"tenantid" -> TenantId <$> o .: "column"
"dimid" -> DimId <$> o .: "table" <*> o .: "column"
"dimval" -> DimVal <$> o .: "table" <*> o .: "column"
"factcount" -> FactCount <$> o .:? "sourcecolumn" <*> o .: "column"
"factsum" -> FactSum <$> o .: "sourcecolumn" <*> o .: "column"
"factaverage" -> FactAverage <$> o .: "sourcecolumn" <*> o .: "column"
"factcountdistinct" -> FactCountDistinct <$> o .:? "sourcecolumn" <*> o .: "column"
"factmax" -> FactMax <$> o .: "sourcecolumn" <*> o .: "column"
"factmin" -> FactMin <$> o .: "sourcecolumn" <*> o .: "column"
"dimtime" -> FactColumn <$> o .: "column" <*> pure DimTime
"nodimid" -> FactColumn <$> o .: "column" <*> pure NoDimId
"tenantid" -> FactColumn <$> o .: "column" <*> pure TenantId
"dimid" -> FactColumn <$> o .: "column" <*> (DimId <$> o .: "table")
"dimval" -> FactColumn <$> o .: "column" <*> (DimVal <$> o .: "table")
"factcount" -> FactColumn <$> o .: "column" <*> (FactCount <$> o .:? "sourcecolumn")
"factcountdistinct" -> FactColumn <$> o .: "column" <*> (FactCountDistinct <$> o .:? "sourcecolumn")
"factsum" -> FactColumn <$> o .: "column" <*> (FactSum <$> o .: "sourcecolumn")
"factaverage" -> FactColumn <$> o .: "column" <*> (FactAverage <$> o .: "sourcecolumn")
"factmax" -> FactColumn <$> o .: "column" <*> (FactMax <$> o .: "sourcecolumn")
"factmin" -> FactColumn <$> o .: "column" <*> (FactMin <$> o .: "sourcecolumn")
_ -> fail $ "Invalid fact column type: " ++ cType
parseJSON o = fail $ "Cannot parse fact column: " ++ show o
@ -72,7 +75,7 @@ instance FromJSON Fact where
<*> o .: "columns"
parseJSON o = fail $ "Cannot parse fact: " ++ show o
data Input = Input [Table] [Fact] TypeDefaults deriving (Eq, Show)
data Input = Input [Table] [Fact] TypeDefaults deriving (Show)
instance FromJSON Input where
parseJSON (Object o) = Input <$> o .: "tables" <*> o .: "facts" <*> o .: "defaults"

@ -22,6 +22,7 @@ library
Ringo.Validator,
Ringo.Extractor.Internal,
Ringo.Generator.Internal,
Ringo.Generator.Sql,
Ringo.Generator.Create,
Ringo.Generator.Populate.Dimension,
Ringo.Generator.Populate.Fact,
@ -32,9 +33,8 @@ library
mtl >=2.1 && <2.3,
raw-strings-qq >=1.0 && <1.2,
hssqlppp ==0.5.23
ghc-options: -Wall -fno-warn-unused-do-bind -funbox-strict-fields -fno-warn-orphans -O2
default-extensions: OverloadedStrings, RecordWildCards, ScopedTypeVariables, BangPatterns,
TupleSections, CPP, NamedFieldPuns
ghc-options: -Wall -Werror -fwarn-incomplete-uni-patterns -fno-warn-unused-do-bind
-fno-warn-orphans -funbox-strict-fields -O2
default-language: Haskell2010
executable ringo
@ -53,17 +53,21 @@ executable ringo
filepath >=1.3 && <1.5,
aeson >=0.8 && <0.11,
ringo
ghc-options: -Wall -fno-warn-unused-do-bind -funbox-strict-fields -fno-warn-orphans -O2
default-extensions: OverloadedStrings, RecordWildCards, ScopedTypeVariables, BangPatterns,
TupleSections, CPP, NamedFieldPuns
ghc-options: -Wall -Werror -fwarn-incomplete-uni-patterns -fno-warn-unused-do-bind
-fno-warn-orphans -funbox-strict-fields -O2
default-language: Haskell2010
test-suite ringo-test
type: exitcode-stdio-1.0
hs-source-dirs: test
main-is: Spec.hs
build-depends: base
, ringo
hs-source-dirs: test, src
main-is: doctests.hs
if impl(ghc >= 7.10)
build-depends: base
, ringo
, pretty-show >=1.6 && <1.7
, doctest >=0.9 && <0.11
else
buildable: False
default-language: Haskell2010
source-repository head

@ -1,5 +1,10 @@
{-# LANGUAGE OverloadedStrings #-}
module Ringo
( module Ringo.Types
( -- | The examples in this module assume the following code has been run.
-- The :{ and :} will only work in GHCi.
-- $setup
module Ringo.Types
, extractFactTable
, extractDimensionTables
, extractDependencies
@ -19,31 +24,608 @@ import qualified Ringo.Extractor as E
import qualified Ringo.Generator as G
import qualified Ringo.Validator as V
-- $setup
-- >>> :set -XOverloadedStrings
-- >>> import Ringo
-- >>> import qualified Data.Map as Map
-- >>> import qualified Data.Text as Text
-- >>> import Data.List (nub)
-- >>> import Text.Show.Pretty
-- >>> :{
--let sessionEventsTable =
-- Table { tableName = "session_events"
-- , tableColumns =
-- [ Column "id" "uuid" NotNull
-- , Column "created_at" "timestamp without time zone" NotNull
-- , Column "member_id" "integer" Null
-- , Column "publisher_id" "integer" NotNull
-- , Column "user_agent" "character varying(1024)" Null
-- , Column "browser_name" "character varying(50)" Null
-- , Column "os" "character varying(50)" Null
-- , Column "geo_country_name" "character varying(50)" Null
-- , Column "geo_city_name" "character varying(50)" Null
-- , Column "geo_continent_name" "character varying(15)" Null
-- , Column "geo_most_specific_subdivision_name" "character varying(100)" Null
-- , Column "geo_longitude" "numeric(9,6)" Null
-- , Column "geo_latitude" "numeric(9,6)" Null
-- , Column "geo_time_zone" "character varying(20)" Null
-- , Column "geo_postal_code" "character varying(20)" Null
-- , Column "user_agent_name" "character varying(100)" Null
-- , Column "user_agent_type" "character varying(15)" Null
-- , Column "user_agent_version" "character varying(100)" Null
-- , Column "user_agent_device" "character varying(15)" Null
-- ]
-- , tableConstraints = [ PrimaryKey "id" ]
-- }
-- sessionFact =
-- Fact { factName = "session"
-- , factTableName = "session_events"
-- , factTablePersistent = True
-- , factParentNames = []
-- , factColumns =
-- [ FactColumn "created_at" $ DimTime
-- , FactColumn "publisher_id" $ NoDimId
-- , FactColumn "browser_name" $ DimVal "user_agent"
-- , FactColumn "os" $ DimVal "user_agent"
-- , FactColumn "user_agent_name" $ DimVal "user_agent"
-- , FactColumn "geo_country_name" $ DimVal "geo"
-- , FactColumn "geo_city_name" $ DimVal "geo"
-- , FactColumn "geo_continent_name" $ DimVal "geo"
-- , FactColumn "session_count" $ FactCount Nothing
-- ]
-- }
-- pageViewEventsTable =
-- Table { tableName = "page_view_events"
-- , tableColumns =
-- [ Column "id" "uuid" NotNull
-- , Column "created_at" "timestamp without time zone" NotNull
-- , Column "member_id" "integer" Null
-- , Column "publisher_id" "integer" NotNull
-- , Column "device_tracker_id" "uuid" Null
-- , Column "session_event_id" "uuid" Null
-- , Column "page_type" "character varying(20)" NotNull
-- , Column "referrer" "character varying(1024)" Null
-- , Column "url" "character varying(1024)" Null
-- , Column "referrer_id" "integer" Null
-- ]
-- , tableConstraints =
-- [ PrimaryKey "id"
-- , ForeignKey "session_events" [ ("session_event_id", "id")
-- , ("publisher_id", "publisher_id")
-- ]
-- , ForeignKey "referrers" [ ("referrer_id", "id")
-- , ("publisher_id", "publisher_id")
-- ]
-- ]
-- }
-- pageViewFact =
-- Fact { factName = "page_view"
-- , factTableName = "page_view_events"
-- , factTablePersistent = True
-- , factParentNames = [ "session" ]
-- , factColumns =
-- [ FactColumn "created_at" $ DimTime
-- , FactColumn "publisher_id" $ NoDimId
-- , FactColumn "page_type" $ DimVal "page_type"
-- , FactColumn "referrer_id" $ DimId "referrers"
-- , FactColumn "view_count" $ FactCount Nothing
-- ]
-- }
-- referrersTable =
-- Table { tableName = "referrers"
-- , tableColumns =
-- [ Column "id" "integer" NotNull
-- , Column "publisher_id" "integer" NotNull
-- , Column "name" "character varying(100)" NotNull
-- ]
-- , tableConstraints =
-- [ PrimaryKey "id"
-- , UniqueKey ["publisher_id", "name"]
-- ]
-- }
-- tables = [sessionEventsTable, pageViewEventsTable, referrersTable]
-- facts = [sessionFact, pageViewFact]
-- typeDefaults = Map.fromList [ ("integer", "-1")
-- , ("timestamp", "'00-00-00 00:00:00'")
-- , ("character", "'__UNKNOWN_VAL__'")
-- , ("uuid", "'00000000-0000-0000-0000-000000000000'::uuid")
-- , ("boolean", "false")
-- , ("json", "'{}'::json")
-- , ("numeric", "-1")
-- , ("text", "'__UNKNOWN_VAL__'")
-- ]
-- settings = defSettings { settingTableNameSuffixTemplate = "" }
-- env = Env tables facts settings typeDefaults
-- :}
-- |
--
-- >>> print $ extractFactTable env sessionFact
-- Table fact_session_by_minute
-- Column created_at_minute_id bigint NOT NULL
-- Column publisher_id integer NOT NULL
-- Column session_count integer NOT NULL
-- Column geo_id integer NOT NULL
-- Column user_agent_id integer NOT NULL
-- UniqueKey (created_at_minute_id, publisher_id, geo_id, user_agent_id)
-- <BLANKLINE>
-- >>> print $ extractFactTable env pageViewFact
-- Table fact_page_view_by_minute
-- Column created_at_minute_id bigint NOT NULL
-- Column publisher_id integer NOT NULL
-- Column view_count integer NOT NULL
-- Column referrer_id integer NOT NULL
-- Column page_type_id integer NOT NULL
-- Column geo_id integer NOT NULL
-- Column user_agent_id integer NOT NULL
-- UniqueKey (created_at_minute_id, publisher_id, referrer_id, page_type_id, geo_id, user_agent_id)
-- <BLANKLINE>
extractFactTable :: Env -> Fact -> Table
extractFactTable env = flip runReader env . E.extractFactTable
-- |
--
-- >>> mapM_ print $ extractDimensionTables env sessionFact
-- Table dim_geo
-- Column id serial NOT NULL
-- Column country_name character varying(50) NOT NULL
-- Column city_name character varying(50) NOT NULL
-- Column continent_name character varying(15) NOT NULL
-- PrimaryKey id
-- UniqueKey (country_name, city_name, continent_name)
-- <BLANKLINE>
-- Table dim_user_agent
-- Column id serial NOT NULL
-- Column browser_name character varying(50) NOT NULL
-- Column os character varying(50) NOT NULL
-- Column name character varying(100) NOT NULL
-- PrimaryKey id
-- UniqueKey (browser_name, os, name)
-- <BLANKLINE>
-- >>> mapM_ print . filter (`notElem` tables) $ extractDimensionTables env pageViewFact
-- Table dim_page_type
-- Column id serial NOT NULL
-- Column page_type character varying(20) NOT NULL
-- PrimaryKey id
-- UniqueKey (page_type)
-- <BLANKLINE>
extractDimensionTables :: Env -> Fact -> [Table]
extractDimensionTables env = flip runReader env . E.extractDimensionTables
-- |
--
-- >>> putStrLn . ppShow $ extractDependencies env sessionFact
-- fromList
-- [ ( "dim_geo" , [ "session_events" ] )
-- , ( "dim_user_agent" , [ "session_events" ] )
-- , ( "fact_session_by_minute"
-- , [ "session_events" , "dim_user_agent" , "dim_geo" ]
-- )
-- ]
-- >>> putStrLn . ppShow $ extractDependencies env pageViewFact
-- fromList
-- [ ( "dim_page_type" , [ "page_view_events" ] )
-- , ( "fact_page_view_by_minute"
-- , [ "page_view_events"
-- , "session_events"
-- , "dim_page_type"
-- , "referrers"
-- , "dim_user_agent"
-- , "dim_geo"
-- ]
-- )
-- ]
extractDependencies :: Env -> Fact -> Dependencies
extractDependencies env = flip runReader env . E.extractDependencies
-- |
--
-- >>> let dimTables = filter (`notElem` tables) . nub . concatMap (extractDimensionTables env) $ facts
-- >>> let sqls = map (dimensionTableDefnSQL env) dimTables
-- >>> mapM_ (\sqls -> mapM_ (putStr . Text.unpack) sqls >> putStrLn "--------" ) sqls
-- create table dim_geo (
-- id serial not null,
-- country_name character varying(50) not null,
-- city_name character varying(50) not null,
-- continent_name character varying(15) not null
-- )
-- ;
-- <BLANKLINE>
-- alter table dim_geo add primary key (id);
-- <BLANKLINE>
-- alter table dim_geo add unique (country_name,
-- city_name,
-- continent_name);
-- <BLANKLINE>
-- create index on dim_geo (country_name)
-- ;
-- create index on dim_geo (city_name)
-- ;
-- create index on dim_geo (continent_name)
-- ;
-- --------
-- create table dim_user_agent (
-- id serial not null,
-- browser_name character varying(50) not null,
-- os character varying(50) not null,
-- name character varying(100) not null
-- )
-- ;
-- <BLANKLINE>
-- alter table dim_user_agent add primary key (id);
-- <BLANKLINE>
-- alter table dim_user_agent add unique (browser_name, os, name);
-- <BLANKLINE>
-- create index on dim_user_agent (browser_name)
-- ;
-- create index on dim_user_agent (os)
-- ;
-- create index on dim_user_agent (name)
-- ;
-- --------
-- create table dim_page_type (
-- id serial not null,
-- page_type character varying(20) not null
-- )
-- ;
-- <BLANKLINE>
-- alter table dim_page_type add primary key (id);
-- <BLANKLINE>
-- alter table dim_page_type add unique (page_type);
-- <BLANKLINE>
-- --------
dimensionTableDefnSQL :: Env -> Table -> [Text]
dimensionTableDefnSQL env = flip runReader env . G.dimensionTableDefnSQL
-- |
--
-- >>> let storySessionFactTable = extractFactTable env sessionFact
-- >>> let sqls = factTableDefnSQL env sessionFact storySessionFactTable
-- >>> mapM_ (putStr . Text.unpack) sqls
-- create table fact_session_by_minute (
-- created_at_minute_id bigint not null,
-- publisher_id integer not null,
-- session_count integer not null,
-- geo_id integer not null,
-- user_agent_id integer not null
-- )
-- ;
-- <BLANKLINE>
-- alter table fact_session_by_minute add unique (created_at_minute_id,
-- publisher_id,
-- geo_id,
-- user_agent_id);
-- <BLANKLINE>
-- create index on fact_session_by_minute (created_at_minute_id)
-- ;
-- create index on fact_session_by_minute (publisher_id)
-- ;
-- create index on fact_session_by_minute (geo_id)
-- ;
-- create index on fact_session_by_minute (user_agent_id)
-- ;
-- >>> let pageViewFactTable = extractFactTable env pageViewFact
-- >>> let sqls = factTableDefnSQL env pageViewFact pageViewFactTable
-- >>> mapM_ (putStr . Text.unpack) sqls
-- create table fact_page_view_by_minute (
-- created_at_minute_id bigint not null,
-- publisher_id integer not null,
-- view_count integer not null,
-- referrer_id integer not null,
-- page_type_id integer not null,
-- geo_id integer not null,
-- user_agent_id integer not null
-- )
-- ;
-- <BLANKLINE>
-- alter table fact_page_view_by_minute add unique (created_at_minute_id,
-- publisher_id,
-- referrer_id,
-- page_type_id,
-- geo_id,
-- user_agent_id);
-- <BLANKLINE>
-- create index on fact_page_view_by_minute (created_at_minute_id)
-- ;
-- create index on fact_page_view_by_minute (publisher_id)
-- ;
-- create index on fact_page_view_by_minute (referrer_id)
-- ;
-- create index on fact_page_view_by_minute (page_type_id)
-- ;
-- create index on fact_page_view_by_minute (geo_id)
-- ;
-- create index on fact_page_view_by_minute (user_agent_id)
-- ;
factTableDefnSQL :: Env -> Fact -> Table -> [Text]
factTableDefnSQL env fact = flip runReader env . G.factTableDefnSQL fact
-- |
--
-- >>> let storySessionDimTableNames = map tableName $ extractDimensionTables env sessionFact
-- >>> let sqls = map (dimensionTablePopulateSQL FullPopulation env sessionFact) storySessionDimTableNames
-- >>> mapM_ (putStr . Text.unpack) sqls
-- insert into dim_geo (country_name, city_name, continent_name)
-- select distinct
-- coalesce(session_events.geo_country_name,'__UNKNOWN_VAL__') as geo_country_name,
-- coalesce(session_events.geo_city_name,'__UNKNOWN_VAL__') as geo_city_name,
-- coalesce(session_events.geo_continent_name,'__UNKNOWN_VAL__') as geo_continent_name
-- from
-- session_events
-- where
-- (geo_country_name is not null or geo_city_name is not null or geo_continent_name is not null)
-- and
-- created_at < ?
-- ;
-- <BLANKLINE>
-- insert into dim_user_agent (browser_name, os, name)
-- select distinct
-- coalesce(session_events.browser_name,'__UNKNOWN_VAL__') as browser_name,
-- coalesce(session_events.os,'__UNKNOWN_VAL__') as os,
-- coalesce(session_events.user_agent_name,'__UNKNOWN_VAL__') as user_agent_name
-- from
-- session_events
-- where
-- (browser_name is not null or os is not null or user_agent_name is not null)
-- and
-- created_at < ?
-- ;
-- <BLANKLINE>
-- >>> let sqls = map (dimensionTablePopulateSQL IncrementalPopulation env sessionFact) storySessionDimTableNames
-- >>> mapM_ (putStr . Text.unpack) sqls
-- insert into dim_geo (country_name, city_name, continent_name)
-- select
-- x.*
-- from
-- (select distinct
-- coalesce(session_events.geo_country_name,'__UNKNOWN_VAL__') as geo_country_name,
-- coalesce(session_events.geo_city_name,'__UNKNOWN_VAL__') as geo_city_name,
-- coalesce(session_events.geo_continent_name,'__UNKNOWN_VAL__') as geo_continent_name
-- from
-- session_events
-- where
-- (geo_country_name is not null or geo_city_name is not null or geo_continent_name is not null)
-- and
-- created_at < ?
-- and
-- created_at >= ?) as x
-- left outer join
-- dim_geo
-- on dim_geo.country_name = x.geo_country_name
-- and
-- dim_geo.city_name = x.geo_city_name
-- and
-- dim_geo.continent_name = x.geo_continent_name
-- where
-- dim_geo.country_name is null and dim_geo.city_name is null
-- and
-- dim_geo.continent_name is null
-- ;
-- <BLANKLINE>
-- insert into dim_user_agent (browser_name, os, name)
-- select
-- x.*
-- from
-- (select distinct
-- coalesce(session_events.browser_name,'__UNKNOWN_VAL__') as browser_name,
-- coalesce(session_events.os,'__UNKNOWN_VAL__') as os,
-- coalesce(session_events.user_agent_name,'__UNKNOWN_VAL__') as user_agent_name
-- from
-- session_events
-- where
-- (browser_name is not null or os is not null or user_agent_name is not null)
-- and
-- created_at < ?
-- and
-- created_at >= ?) as x
-- left outer join
-- dim_user_agent
-- on dim_user_agent.browser_name = x.browser_name
-- and
-- dim_user_agent.os = x.os
-- and
-- dim_user_agent.name = x.user_agent_name
-- where
-- dim_user_agent.browser_name is null and dim_user_agent.os is null
-- and
-- dim_user_agent.name is null
-- ;
-- <BLANKLINE>
-- >>> let pageViewDimTableNames = map tableName . filter (`notElem` tables) $ extractDimensionTables env pageViewFact
-- >>> let sqls = map (dimensionTablePopulateSQL FullPopulation env pageViewFact) pageViewDimTableNames
-- >>> mapM_ (putStr . Text.unpack) sqls
-- insert into dim_page_type (page_type)
-- select distinct
-- page_view_events.page_type as page_type
-- from
-- page_view_events
-- where
-- (page_type is not null) and created_at < ?
-- ;
-- <BLANKLINE>
-- >>> let sqls = map (dimensionTablePopulateSQL IncrementalPopulation env pageViewFact) pageViewDimTableNames
-- >>> mapM_ (putStr . Text.unpack) sqls
-- insert into dim_page_type (page_type)
-- select
-- x.*
-- from
-- (select distinct
-- page_view_events.page_type as page_type
-- from
-- page_view_events
-- where
-- (page_type is not null) and created_at < ?
-- and
-- created_at >= ?) as x
-- left outer join
-- dim_page_type
-- on dim_page_type.page_type = x.page_type
-- where
-- dim_page_type.page_type is null
-- ;
-- <BLANKLINE>
dimensionTablePopulateSQL :: TablePopulationMode -> Env -> Fact -> TableName -> Text
dimensionTablePopulateSQL popMode env fact =
flip runReader env . G.dimensionTablePopulateSQL popMode fact
-- |
--
-- >>> let sqls = factTablePopulateSQL FullPopulation env sessionFact
-- >>> mapM_ (putStr . Text.unpack) sqls
-- insert into fact_session_by_minute (created_at_minute_id,
-- publisher_id,
-- session_count,
-- geo_id,
-- user_agent_id)
-- select
-- cast(floor(extract(epoch from session_events.created_at) / 60) as bigint) as xxff_created_at_minute_id,
-- session_events.publisher_id as xxff_publisher_id,
-- count(*) as xxff_session_count,
-- coalesce((select
-- id
-- from
-- dim_geo as dim_geo
-- where
-- dim_geo.country_name = coalesce(session_events.geo_country_name,'__UNKNOWN_VAL__')
-- and
-- dim_geo.city_name = coalesce(session_events.geo_city_name,'__UNKNOWN_VAL__')
-- and
-- dim_geo.continent_name = coalesce(session_events.geo_continent_name,'__UNKNOWN_VAL__')),-1) as xxff_geo_id,
-- coalesce((select
-- id
-- from
-- dim_user_agent as dim_user_agent
-- where
-- dim_user_agent.browser_name = coalesce(session_events.browser_name,'__UNKNOWN_VAL__')
-- and
-- dim_user_agent.os = coalesce(session_events.os,'__UNKNOWN_VAL__')
-- and
-- dim_user_agent.name = coalesce(session_events.user_agent_name,'__UNKNOWN_VAL__')),-1) as xxff_user_agent_id
-- from
-- session_events
-- where
-- session_events.created_at < ?
-- group by
-- xxff_created_at_minute_id,
-- xxff_publisher_id,
-- xxff_geo_id,
-- xxff_user_agent_id
-- ;
-- <BLANKLINE>
-- >>> let sqls = factTablePopulateSQL IncrementalPopulation env sessionFact
-- >>> mapM_ (putStr . Text.unpack) sqls
-- insert into fact_session_by_minute (created_at_minute_id,
-- publisher_id,
-- session_count,
-- geo_id,
-- user_agent_id)
-- select
-- cast(floor(extract(epoch from session_events.created_at) / 60) as bigint) as xxff_created_at_minute_id,
-- session_events.publisher_id as xxff_publisher_id,
-- count(*) as xxff_session_count,
-- coalesce((select
-- id
-- from
-- dim_geo as dim_geo
-- where
-- dim_geo.country_name = coalesce(session_events.geo_country_name,'__UNKNOWN_VAL__')
-- and
-- dim_geo.city_name = coalesce(session_events.geo_city_name,'__UNKNOWN_VAL__')
-- and
-- dim_geo.continent_name = coalesce(session_events.geo_continent_name,'__UNKNOWN_VAL__')),-1) as xxff_geo_id,
-- coalesce((select
-- id
-- from
-- dim_user_agent as dim_user_agent
-- where
-- dim_user_agent.browser_name = coalesce(session_events.browser_name,'__UNKNOWN_VAL__')
-- and
-- dim_user_agent.os = coalesce(session_events.os,'__UNKNOWN_VAL__')
-- and
-- dim_user_agent.name = coalesce(session_events.user_agent_name,'__UNKNOWN_VAL__')),-1) as xxff_user_agent_id
-- from
-- session_events
-- where
-- session_events.created_at < ? and session_events.created_at >= ?
-- group by
-- xxff_created_at_minute_id,
-- xxff_publisher_id,
-- xxff_geo_id,
-- xxff_user_agent_id
-- ;
-- <BLANKLINE>
-- >>> let sqls = factTablePopulateSQL FullPopulation env pageViewFact
-- >>> mapM_ (putStr . Text.unpack) sqls
-- insert into fact_page_view_by_minute (created_at_minute_id,
-- publisher_id,
-- view_count,
-- referrer_id,
-- page_type_id,
-- geo_id,
-- user_agent_id)
-- select
-- cast(floor(extract(epoch from page_view_events.created_at) / 60) as bigint) as xxff_created_at_minute_id,
-- page_view_events.publisher_id as xxff_publisher_id,
-- count(*) as xxff_view_count,
-- coalesce(page_view_events.referrer_id,-1) as xxff_referrer_id,
-- coalesce((select
-- id
-- from
-- dim_page_type as dim_page_type
-- where
-- dim_page_type.page_type = page_view_events.page_type),-1) as xxff_page_type_id,
-- coalesce((select
-- id
-- from
-- dim_geo as dim_geo
-- where
-- dim_geo.country_name = coalesce(session_events.geo_country_name,'__UNKNOWN_VAL__')
-- and
-- dim_geo.city_name = coalesce(session_events.geo_city_name,'__UNKNOWN_VAL__')
-- and
-- dim_geo.continent_name = coalesce(session_events.geo_continent_name,'__UNKNOWN_VAL__')),-1) as xxff_geo_id,
-- coalesce((select
-- id
-- from
-- dim_user_agent as dim_user_agent
-- where
-- dim_user_agent.browser_name = coalesce(session_events.browser_name,'__UNKNOWN_VAL__')
-- and
-- dim_user_agent.os = coalesce(session_events.os,'__UNKNOWN_VAL__')
-- and
-- dim_user_agent.name = coalesce(session_events.user_agent_name,'__UNKNOWN_VAL__')),-1) as xxff_user_agent_id
-- from
-- page_view_events
-- left outer join
-- session_events
-- on page_view_events.session_event_id = session_events.id
-- and
-- page_view_events.publisher_id = session_events.publisher_id
-- where
-- page_view_events.created_at < ?
-- group by
-- xxff_created_at_minute_id,
-- xxff_publisher_id,
-- xxff_referrer_id,
-- xxff_page_type_id,
-- xxff_geo_id,
-- xxff_user_agent_id
-- ;
-- <BLANKLINE>
factTablePopulateSQL :: TablePopulationMode -> Env -> Fact -> [Text]
factTablePopulateSQL popMode env =
flip runReader env . G.factTablePopulateSQL popMode
-- |
--
-- >>> concatMap (validateTable env) tables
-- []
validateTable :: Env -> Table -> [ValidationError]
validateTable env = flip runReader env . V.validateTable
-- |
--
-- >>> concatMap (validateFact env) facts
-- []
validateFact :: Env -> Fact -> [ValidationError]
validateFact env = flip runReader env . V.validateFact

@ -1,3 +1,6 @@
{-# LANGUAGE OverloadedStrings #-}
{-# LANGUAGE RecordWildCards #-}
{-# LANGUAGE GADTs #-}
module Ringo.Extractor
( extractDimensionTables
, extractAllDimensionTables
@ -30,34 +33,36 @@ extractFactTable fact = do
notNullSourceColumnCopy cName = (sourceColumn cName) { columnNullable = NotNull }
notNullSourceColumnRename scName cName = (notNullSourceColumnCopy scName) { columnName = cName }
columns = concatFor (factColumns fact) $ \col -> case col of
DimTime cName ->
[ Column (timeUnitColumnName dimIdColName cName settingTimeUnit) "bigint" NotNull ]
NoDimId cName -> [ notNullSourceColumnCopy cName ]
TenantId cName -> [ notNullSourceColumnCopy cName ]
FactCount _ cName -> [ Column cName countColType NotNull ]
FactSum scName cName -> [ notNullSourceColumnRename scName cName ]
FactMax scName cName -> [ notNullSourceColumnRename scName cName ]
FactMin scName cName -> [ notNullSourceColumnRename scName cName ]
FactAverage scName cName ->
[ Column (cName <> settingAvgCountColumSuffix) countColType NotNull
, notNullSourceColumnRename scName (cName <> settingAvgSumColumnSuffix)
]
FactCountDistinct _ cName -> [ Column cName "json" NotNull ]
_ -> []
columns = concatFor (factColumns fact) $ \FactColumn {factColTargetColumn = cName, ..} ->
case factColType of
DimTime ->
[ Column (timeUnitColumnName dimIdColName cName settingTimeUnit) "bigint" NotNull ]
NoDimId -> [ notNullSourceColumnCopy cName ]
TenantId -> [ notNullSourceColumnCopy cName ]
FactCount {..} -> [ Column cName countColType NotNull ]
FactCountDistinct {..} -> [ Column cName "json" NotNull ]
FactSum {..} -> [ notNullSourceColumnRename factColSourceColumn cName ]
FactMax {..} -> [ notNullSourceColumnRename factColSourceColumn cName ]
FactMin {..} -> [ notNullSourceColumnRename factColSourceColumn cName ]
FactAverage {..} ->
[ Column (cName <> settingAvgCountColumSuffix) countColType NotNull
, notNullSourceColumnRename factColSourceColumn (cName <> settingAvgSumColumnSuffix)
]
_ -> []
fkColumns = for allDims $ \(_, Table {..}) ->
let colName = factDimFKIdColumnName settingDimPrefix dimIdColName tableName
colType = idColTypeToFKIdColType settingDimTableIdColumnType
fkColumns = for allDims $ \(dimFact, dimTable) ->
let colName = factDimFKIdColumnName settingDimPrefix dimIdColName dimFact dimTable tables
colType = idColTypeToFKIdColType settingDimTableIdColumnType
in Column colName colType NotNull
ukColNames =
(++ map columnName fkColumns)
. forMaybe (factColumns fact) $ \col -> case col of
DimTime cName -> Just (timeUnitColumnName dimIdColName cName settingTimeUnit)
NoDimId cName -> Just cName
TenantId cName -> Just cName
_ -> Nothing
. forMaybe (factColumns fact) $ \FactColumn {factColTargetColumn = cName, ..} ->
case factColType of
DimTime -> Just $ timeUnitColumnName dimIdColName cName settingTimeUnit
NoDimId -> Just cName
TenantId -> Just cName
_ -> Nothing
return Table
{ tableName =
@ -75,15 +80,15 @@ extractDependencies fact = do
(factTableName fct, parentFacts fct facts)
factDimDeps =
nub . concat . Tree.flatten . flip Tree.unfoldTree fact $ \fct ->
( forMaybe (factColumns fct) $ \col -> case col of
DimVal table _ -> Just $ settingDimPrefix <> table
DimId table _ -> Just table
_ -> Nothing
( forMaybe (factColumns fct) $ \FactColumn {..} -> case factColType of
DimVal {..} -> Just $ settingDimPrefix <> factColTargetTable
DimId {..} -> Just factColTargetTable
_ -> Nothing
, parentFacts fct facts
)
dimDeps = Map.fromList [ (settingDimPrefix <> table, [factTableName fact])
| DimVal table _ <- factColumns fact ]
| FactColumn {factColType = DimVal table} <- factColumns fact ]
factDeps = Map.singleton (extractedTable settings) (factSourceDeps ++ factDimDeps)
return $ Map.union dimDeps factDeps

@ -1,3 +1,8 @@
{-# LANGUAGE OverloadedStrings #-}
{-# LANGUAGE RecordWildCards #-}
{-# LANGUAGE TupleSections #-}
{-# LANGUAGE CPP #-}
{-# LANGUAGE GADTs #-}
module Ringo.Extractor.Internal where
import qualified Data.Map as Map
@ -34,9 +39,13 @@ timeUnitColumnName :: Text -> ColumnName -> TimeUnit -> ColumnName
timeUnitColumnName dimIdColName colName timeUnit =
colName <> "_" <> timeUnitName timeUnit <> "_" <> dimIdColName
factDimFKIdColumnName :: Text -> Text -> TableName -> ColumnName
factDimFKIdColumnName dimPrefix dimIdColName dimTableName =
fromMaybe dimTableName (Text.stripPrefix dimPrefix dimTableName) <> "_" <> dimIdColName
factDimFKIdColumnName :: Text -> Text -> Fact -> Table -> [Table] -> ColumnName
factDimFKIdColumnName dimPrefix dimIdColName dimFact dimTable@Table { .. } tables =
if dimTable `elem` tables
then head [ factColTargetColumn
| FactColumn {factColType = DimId {..}, ..} <- factColumns dimFact
, factColTargetTable == tableName ]
else fromMaybe tableName (Text.stripPrefix dimPrefix tableName) <> "_" <> dimIdColName
extractedFactTableName :: Text -> Text -> TableName -> TimeUnit -> TableName
extractedFactTableName factPrefix factInfix factName timeUnit =
@ -56,7 +65,9 @@ extractDimensionTables fact = do
let table = fromJust . findTable (factTableName fact) $ tables
return $ dimsFromIds tables ++ dimsFromVals settings (tableColumns table)
where
dimsFromIds tables = catMaybes [ findTable d tables | DimId d _ <- factColumns fact ]
dimsFromIds tables =
catMaybes [ findTable factColTargetTable tables
| FactColumn {factColType = DimId {..}} <- factColumns fact ]
dimsFromVals Settings {..} tableColumns =
map (\(dim, cols) ->
@ -75,9 +86,9 @@ extractDimensionTables fact = do
. nub)
. Map.fromListWith (flip (++))
. mapMaybe (\fcol -> do
DimVal d col <- fcol
column <- findColumn col tableColumns
return (d, [ column ]))
FactColumn {factColType = DimVal {..}, ..} <- fcol
column <- findColumn factColTargetColumn tableColumns
return (factColTargetTable, [ column ]))
. map Just
. factColumns
$ fact

@ -1,78 +1,96 @@
{-# LANGUAGE OverloadedStrings #-}
{-# LANGUAGE RecordWildCards #-}
{-# LANGUAGE CPP #-}
{-# LANGUAGE GADTs #-}
{-# LANGUAGE PatternSynonyms #-}
module Ringo.Generator.Create (dimensionTableDefnSQL, factTableDefnSQL) where
#if MIN_VERSION_base(4,8,0)
#else
import Control.Applicative ((<$>))
import Control.Applicative ((<$>))
#endif
import Control.Monad.Reader (Reader, asks)
import Data.Maybe (listToMaybe, maybeToList)
import Data.Monoid ((<>))
import Data.Text (Text)
import Control.Monad.Reader (Reader, asks)
import Database.HsSqlPpp.Syntax ( Statement(..), RowConstraint(..), AlterTableAction(..)
, AlterTableOperation(..), Constraint(..), Cascade(..) )
import Data.Maybe (listToMaybe, maybeToList)
import Data.Monoid ((<>))
import Data.Text (Text)
import Ringo.Extractor.Internal
import Ringo.Generator.Internal
import Ringo.Generator.Sql
import Ringo.Types
import Ringo.Utils
tableDefnSQL :: Table -> Reader Env [Text]
tableDefnSQL Table {..} = do
tableDefnStmts :: Table -> Reader Env [Statement]
tableDefnStmts Table {..} = do
Settings {..} <- asks envSettings
let tabName = tableName <> settingTableNameSuffixTemplate
tableSQL = "CREATE TABLE " <> tabName <> " (\n"
<> (joinColumnNames . map columnDefnSQL $ tableColumns)
<> "\n)"
tableSQL = CreateTable ea (name tabName) (map columnDefnSQL tableColumns) [] Nothing
columnDefnSQL Column {..} =
columnName <> " " <> columnType <> " " <> nullableDefnSQL columnNullable
attDef columnName columnType $ nullableDefnSQL columnNullable
nullableDefnSQL Null = "NULL"
nullableDefnSQL NotNull = "NOT NULL"
nullableDefnSQL Null = NullConstraint ea ""
nullableDefnSQL NotNull = NotNullConstraint ea ""
constraintDefnSQL constraint =
let alterTableSQL = "ALTER TABLE ONLY " <> tabName <> " ADD "
in case constraint of
PrimaryKey cName -> [ alterTableSQL <> "PRIMARY KEY (" <> cName <> ")" ]
ForeignKey oTableName cNamePairs ->
[ alterTableSQL <> "FOREIGN KEY (" <> joinColumnNames (map fst cNamePairs) <> ") REFERENCES "
<> oTableName <> " (" <> joinColumnNames (map snd cNamePairs) <> ")" ]
UniqueKey cNames -> ["CREATE UNIQUE INDEX ON " <> tabName <> " (" <> joinColumnNames cNames <> ")"]
let constr = case constraint of
PrimaryKey cName -> PrimaryKeyConstraint ea "" [nmc cName]
ForeignKey oTableName cNamePairs ->
ReferenceConstraint ea "" (map (nmc . fst) cNamePairs)
(name oTableName) (map (nmc . snd) cNamePairs) Restrict Restrict
UniqueKey cNames -> UniqueConstraint ea "" $ map nmc cNames
in AlterTable ea (name tabName) $ AlterTableActions ea [AddConstraint ea constr]
return $ tableSQL : concatMap constraintDefnSQL tableConstraints
return $ tableSQL : map constraintDefnSQL tableConstraints
tableDefnSQL :: Table -> (Table -> Reader Env [Statement]) -> Reader Env [Text]
tableDefnSQL table indexFn = do
ds <- map ppStatement <$> tableDefnStmts table
is <- map (\st -> ppStatement st <> ";\n") <$> indexFn table
return $ ds ++ is
dimensionTableDefnSQL :: Table -> Reader Env [Text]
dimensionTableDefnSQL table@Table {..} = do
dimensionTableDefnSQL table = tableDefnSQL table dimensionTableIndexStmts
dimensionTableIndexStmts :: Table -> Reader Env [Statement]
dimensionTableIndexStmts Table {..} = do
Settings {..} <- asks envSettings
let tabName = tableName <> settingTableNameSuffixTemplate
tablePKColName = head [ cName | PrimaryKey cName <- tableConstraints ]
nonPKColNames = [ cName | Column cName _ _ <- tableColumns, cName /= tablePKColName ]
indexSQLs = [ "CREATE INDEX ON " <> tabName <> " (" <> cName <> ")"
| cName <- nonPKColNames, length nonPKColNames > 1 ]
(++ indexSQLs) <$> tableDefnSQL table
return [ CreateIndexTSQL ea (nmc "") (name tabName) [nmc cName]
| cName <- nonPKColNames, length nonPKColNames > 1 ]
factTableDefnSQL :: Fact -> Table -> Reader Env [Text]
factTableDefnSQL fact table = do
factTableDefnSQL fact table = tableDefnSQL table (factTableIndexStmts fact)
factTableIndexStmts :: Fact -> Table -> Reader Env [Statement]
factTableIndexStmts fact table = do
Settings {..} <- asks envSettings
tables <- asks envTables
allDims <- extractAllDimensionTables fact
let dimTimeCol = head [ cName | DimTime cName <- factColumns fact ]
tenantIdCol = listToMaybe [ cName | TenantId cName <- factColumns fact ]
let dimTimeCol = head [ cName | DimTimeV cName <- factColumns fact ]
tenantIdCol = listToMaybe [ cName | TenantIdV cName <- factColumns fact ]
tabName = tableName table <> settingTableNameSuffixTemplate
dimTimeColName cName = timeUnitColumnName settingDimTableIdColumnName cName settingTimeUnit
factCols = forMaybe (factColumns fact) $ \col -> case col of
DimTime cName -> Just $ dimTimeColName cName
NoDimId cName -> Just cName
TenantId cName -> Just cName
_ -> Nothing
factCols = forMaybe (factColumns fact) $ \FactColumn {factColTargetColumn = cName, ..} ->
case factColType of
DimTime -> Just [dimTimeColName cName]
NoDimId -> Just [cName]
TenantId -> Just [cName]
_ -> Nothing
dimCols = [ factDimFKIdColumnName settingDimPrefix settingDimTableIdColumnName tableName
| (_, Table {..}) <- allDims ]
dimCols = [ [ factDimFKIdColumnName settingDimPrefix settingDimTableIdColumnName dimFact dimTable tables ]
| (dimFact, dimTable) <- allDims ]
indexSQLs = [ "CREATE INDEX ON " <> tableName table <> settingTableNameSuffixTemplate
<> " USING btree (" <> col <> ")"
| col <- factCols ++ dimCols ++ [ cName <> ", " <> dimTimeColName dimTimeCol
return [ CreateIndexTSQL ea (nmc "") (name tabName) (map nmc cols)
| cols <- factCols ++ dimCols ++ [ [cName, dimTimeColName dimTimeCol]
| cName <- maybeToList tenantIdCol ] ]
(++ indexSQLs) <$> tableDefnSQL table

@ -1,34 +1,33 @@
{-# LANGUAGE OverloadedStrings #-}
{-# LANGUAGE RecordWildCards #-}
{-# LANGUAGE GADTs #-}
module Ringo.Generator.Internal where
import qualified Data.Map as Map
import qualified Data.Text as Text
import Data.List (find)
import Data.Monoid ((<>))
import Data.Text (Text)
import Database.HsSqlPpp.Syntax (ScalarExpr)
import Data.List (find)
import Data.Monoid ((<>))
import Data.Text (Text)
import Ringo.Extractor.Internal
import Ringo.Generator.Sql
import Ringo.Types
joinColumnNames :: [ColumnName] -> Text
joinColumnNames = Text.intercalate ",\n"
fullColumnName :: TableName -> ColumnName -> ColumnName
fullColumnName tName cName = tName <> "." <> cName
dimColumnMapping :: Text -> Fact -> TableName -> [(ColumnName, ColumnName)]
dimColumnMapping dimPrefix fact dimTableName =
[ (dimColumnName dName cName, cName)
| DimVal dName cName <- factColumns fact
, dimPrefix <> dName == dimTableName ]
[ (dimColumnName factColTargetTable factColTargetColumn, factColTargetColumn)
| FactColumn { factColType = DimVal {..}, ..} <- factColumns fact
, dimPrefix <> factColTargetTable == dimTableName ]
coalesceColumn :: TypeDefaults -> TableName -> Column -> Text
coalesceColumn :: TypeDefaults -> TableName -> Column -> ScalarExpr
coalesceColumn defaults tName Column{..} =
if columnNullable == Null
then "coalesce(" <> fqColName <> ", " <> defVal columnType <> ")"
then app "coalesce" [fqColName, num $ defVal columnType]
else fqColName
where
fqColName = fullColumnName tName columnName
fqColName = eqi tName columnName
defVal colType =
maybe (error $ "Default value not known for column type: " ++ Text.unpack colType) snd

@ -1,55 +1,62 @@
module Ringo.Generator.Populate.Dimension (dimensionTablePopulateSQL) where
{-# LANGUAGE OverloadedStrings #-}
{-# LANGUAGE RecordWildCards #-}
{-# LANGUAGE CPP #-}
{-# LANGUAGE PatternSynonyms #-}
import qualified Data.Text as Text
module Ringo.Generator.Populate.Dimension (dimensionTablePopulateSQL) where
#if MIN_VERSION_base(4,8,0)
#else
import Control.Applicative ((<$>))
import Control.Applicative ((<$>))
#endif
import Control.Monad.Reader (Reader, asks)
import Data.Maybe (fromJust)
import Data.Monoid ((<>))
import Data.Text (Text)
import Control.Monad.Reader (Reader, asks)
import Database.HsSqlPpp.Syntax (Statement, QueryExpr(..), Distinct(..), makeSelect, JoinType(..))
import Data.Maybe (fromJust)
import Data.Text (Text)
import Ringo.Extractor.Internal
import Ringo.Generator.Internal
import Ringo.Generator.Sql
import Ringo.Types
dimensionTablePopulateSQL :: TablePopulationMode -> Fact -> TableName -> Reader Env Text
dimensionTablePopulateSQL popMode fact dimTableName = do
dimensionTablePopulateSQL popMode fact dimTableName =
ppStatement <$> dimensionTablePopulateStmt popMode fact dimTableName
dimensionTablePopulateStmt :: TablePopulationMode -> Fact -> TableName -> Reader Env Statement
dimensionTablePopulateStmt popMode fact dimTableName = do
Settings {..} <- asks envSettings
tables <- asks envTables
defaults <- asks envTypeDefaults
let factTable = fromJust $ findTable (factTableName fact) tables
colMapping = dimColumnMapping settingDimPrefix fact dimTableName
selectCols = [ coalesceColumn defaults (factTableName fact) col <> " AS " <> cName
selectCols = [ flip sia (nmc cName) $ coalesceColumn defaults (factTableName fact) col
| (_, cName) <- colMapping
, let col = fromJust . findColumn cName $ tableColumns factTable ]
timeCol = head [ cName | DimTime cName <- factColumns fact ]
baseSelectC = "SELECT DISTINCT\n" <> joinColumnNames selectCols
<> "\nFROM " <> factTableName fact
baseWhereCs = [ "(\n"
<> Text.intercalate "\nOR " [ c <> " IS NOT NULL" | (_, c) <- colMapping ]
<> "\n)"
, timeCol <> " < ?"
]
insertC selectC whereCs =
"INSERT INTO "
<> suffixTableName popMode settingTableNameSuffixTemplate dimTableName
<> " (\n" <> joinColumnNames (map fst colMapping) <> "\n) "
<> "SELECT x.* FROM (\n"
<> selectC <> "\nWHERE " <> Text.intercalate " AND\n" whereCs
<> ") x"
return $ case popMode of
FullPopulation -> insertC baseSelectC baseWhereCs
IncrementalPopulation ->
insertC baseSelectC (baseWhereCs ++ [ timeCol <> " >= ?" ])
<> "\nLEFT JOIN " <> dimTableName <> " ON\n"
<> Text.intercalate " \nAND "
[ fullColumnName dimTableName c1 <> " = " <> fullColumnName "x" c2
| (c1, c2) <- colMapping ]
<> "\nWHERE " <> Text.intercalate " \nAND "
[ fullColumnName dimTableName c <> " IS NULL" | (c, _) <- colMapping ]
timeCol = head ([ cName | DimTimeV cName <- factColumns fact ] :: [ColumnName])
isNotNullC = parens . foldBinop "or" . map (postop "isnotnull" . ei . snd) $ colMapping
selectWhereC = Just . foldBinop "and" $
[ isNotNullC, binop "<" (ei timeCol) placeholder ] ++
[ binop ">=" (ei timeCol) placeholder | popMode == IncrementalPopulation ]
selectC = makeSelect
{ selDistinct = Distinct
, selSelectList = sl selectCols
, selTref = [tref $ factTableName fact]
, selWhere = selectWhereC
}
iTableName = suffixTableName popMode settingTableNameSuffixTemplate dimTableName
insertC = insert iTableName (map fst colMapping) $ case popMode of
FullPopulation -> selectC
IncrementalPopulation -> let alias = "x" in
makeSelect
{ selSelectList = sl [si $ qstar alias]
, selTref =
[ tjoin (subtrefa alias selectC) LeftOuter (tref dimTableName) . Just $
foldBinop "and" [ binop "=" (eqi dimTableName c1) (eqi alias c2) | (c1, c2) <- colMapping ] ]
, selWhere =
Just . foldBinop "and" . map (postop "isnull" . eqi dimTableName . fst) $ colMapping
}
return insertC

@ -1,22 +1,33 @@
{-# LANGUAGE OverloadedStrings #-}
{-# LANGUAGE RecordWildCards #-}
{-# LANGUAGE ScopedTypeVariables #-}
{-# LANGUAGE CPP #-}
{-# LANGUAGE NamedFieldPuns #-}
{-# LANGUAGE QuasiQuotes #-}
{-# LANGUAGE GADTs #-}
{-# LANGUAGE PatternSynonyms #-}
module Ringo.Generator.Populate.Fact (factTablePopulateSQL) where
import qualified Data.Text as Text
#if MIN_VERSION_base(4,8,0)
#else
import Control.Applicative ((<$>))
import Control.Applicative ((<$>))
#endif
import Control.Monad.Reader (Reader, asks)
import Data.List (nub)
import Data.Maybe (fromJust, fromMaybe, mapMaybe, listToMaybe)
import Data.Monoid ((<>))
import Data.Text (Text)
import Text.RawString.QQ (r)
import Control.Monad.Reader (Reader, asks)
import Database.HsSqlPpp.Syntax ( QueryExpr(..), Statement, makeSelect
, SelectList(..), SelectItem(..), JoinType(..) )
import Data.List (nub)
import Data.Maybe (fromJust, fromMaybe, listToMaybe)
import Data.Monoid ((<>))
import Data.Text (Text)
import Text.RawString.QQ (r)
import Ringo.Extractor.Internal
import Ringo.Generator.Internal
import Ringo.Generator.Sql
import Ringo.Types
import Ringo.Utils
@ -43,72 +54,78 @@ BEGIN
RETURN r;
END;
$$
LANGUAGE 'plpgsql' IMMUTABLE|]
data FactTablePopulateSelectSQL = FactTablePopulateSelectSQL
{ ftpsSelectCols :: ![(Text, Text)]
, ftpsSelectTable :: !Text
, ftpsJoinClauses :: ![Text]
, ftpsWhereClauses :: ![Text]
, ftpsGroupByCols :: ![Text]
} deriving (Show, Eq)
factTableUpdateSQL :: TablePopulationMode -> Fact -> Text -> FactTablePopulateSelectSQL -> Reader Env [Text]
factTableUpdateSQL popMode fact groupByColPrefix populateSelectSQL@FactTablePopulateSelectSQL {..} = do
Settings {..} <- asks envSettings
tables <- asks envTables
let countDistinctCols = [ col | col@(FactCountDistinct _ _) <- factColumns fact]
fTableName = factTableName fact
fTable = fromJust . findTable fTableName $ tables
tablePKColName = head [ cName | PrimaryKey cName <- tableConstraints fTable ]
extFactTableName =
suffixTableName popMode settingTableNameSuffixTemplate
$ extractedFactTableName settingFactPrefix settingFactInfix (factName fact) settingTimeUnit
return . (\xs -> if null xs then xs else ilog2FunctionString : xs)
$ for countDistinctCols $ \(FactCountDistinct scName cName) ->
let unqCol = fullColumnName fTableName (fromMaybe tablePKColName scName) <> "::text"
bucketSelectCols =
[ ( "hashtext(" <> unqCol <> ") & "
<> Text.pack (show $ bucketCount settingFactCountDistinctErrorRate - 1)
, cName <> "_bnum"
)
, ( "31 - ilog2(min(hashtext(" <> unqCol <> ") & ~(1 << 31)))"
, cName <> "_bhash"
)
]
selectSQL = toSelectSQL $
populateSelectSQL
{ ftpsSelectCols = filter ((`elem` ftpsGroupByCols) . snd) ftpsSelectCols ++ bucketSelectCols
, ftpsGroupByCols = ftpsGroupByCols ++ [ cName <> "_bnum" ]
, ftpsWhereClauses = ftpsWhereClauses ++ [ unqCol <> " IS NOT NULL" ]
}
aggSelectClause =
"json_object_agg(" <> cName <> "_bnum, " <> cName <> "_bhash) AS " <> cName
in "UPDATE " <> extFactTableName
<> "\nSET " <> cName <> " = " <> fullColumnName "xyz" cName
<> "\nFROM ("
<> "\nSELECT " <> joinColumnNames (ftpsGroupByCols ++ [aggSelectClause])
<> "\nFROM (\n" <> selectSQL <> "\n) zyx"
<> "\nGROUP BY \n" <> joinColumnNames ftpsGroupByCols
<> "\n) xyz"
<> "\n WHERE\n"
<> Text.intercalate "\nAND "
[ fullColumnName extFactTableName .fromJust . Text.stripPrefix groupByColPrefix $ col
<> " = " <> fullColumnName "xyz" col
| col <- ftpsGroupByCols ]
LANGUAGE 'plpgsql' IMMUTABLE;
|]
factCountDistinctUpdateStmts :: TablePopulationMode -> Fact -> Text -> QueryExpr -> Reader Env [Statement]
factCountDistinctUpdateStmts popMode fact groupByColPrefix expr = case expr of
Select {selSelectList = SelectList _ origSelectItems, ..} -> do
Settings {..} <- asks envSettings
tables <- asks envTables
let fTableName = factTableName fact
fTable = fromJust . findTable fTableName $ tables
tablePKColName = head [ cName | PrimaryKey cName <- tableConstraints fTable ]
extFactTableName =
suffixTableName popMode settingTableNameSuffixTemplate
$ extractedFactTableName settingFactPrefix settingFactInfix (factName fact) settingTimeUnit
return $ forMaybe (factColumns fact) $ \FactColumn {factColTargetColumn = cName, ..} ->
case factColType of
FactCountDistinct {factColMaybeSourceColumn = scName} ->
let unqCol = cast (eqi fTableName (fromMaybe tablePKColName scName)) "text"
bucketSelectCols =
[ sia (binop "&" (app "hashtext" [ unqCol ])
(num . Text.pack . show $ bucketCount settingFactCountDistinctErrorRate - 1))
(nmc $ cName <> "_bnum")
, sia (binop "-"
(num "31")
(app "ilog2"
[ app "min" [ binop "&"
(app "hashtext" [ unqCol ])
(prefop "~" (parens (binop "<<" (num "1") (num "31"))))]]))
(nmc $ cName <> "_bhash")
]
groupByCols = map ppScalarExpr selGroupBy
selectList =
[ i | i@(SelectItem _ _ a) <- origSelectItems , a `elem` map nmc groupByCols ]
selectStmt =
makeSelect
{ selSelectList = sl $ selectList ++ bucketSelectCols
, selTref = selTref
, selWhere = binop "and" (postop "isnotnull" unqCol) <$> selWhere
, selGroupBy = selGroupBy ++ [ ei $ cName <> "_bnum" ]
}
aggSelectClause =
sia (app "json_object_agg" [ ei (cName <> "_bnum"), ei (cName <> "_bhash") ]) (nmc cName)
in Just $ update extFactTableName
[ (cName, eqi "xyz" cName) ]
[ subtrefa "xyz"
makeSelect
{ selSelectList = sl $ map (si . ei) groupByCols ++ [ aggSelectClause ]
, selTref = [ subtrefa "zyx" selectStmt ]
, selGroupBy = selGroupBy
} ] $
foldBinop "and"
[ binop "=" (eqi extFactTableName . fromJust . Text.stripPrefix groupByColPrefix $ col)
(eqi "xyz" col)
| col <- groupByCols ]
_ -> Nothing
_ -> return []
where
bucketCount :: Double -> Integer
bucketCount errorRate =
let power :: Double = fromIntegral (ceiling . logBase 2 $ (1.04 / errorRate) ** 2 :: Integer)
in ceiling $ 2 ** power
factTablePopulateSQL :: TablePopulationMode -> Fact -> Reader Env [Text]
factTablePopulateSQL popMode fact = do
factTablePopulateStmts :: TablePopulationMode -> Fact -> Reader Env [Statement]
factTablePopulateStmts popMode fact = do
Settings {..} <- asks envSettings
allDims <- extractAllDimensionTables fact
tables <- asks envTables
@ -117,116 +134,104 @@ factTablePopulateSQL popMode fact = do
fTable = fromJust . findTable fTableName $ tables
dimIdColName = settingDimTableIdColumnName
coalesceFKId col =
if "coalesce" `Text.isPrefixOf` col
then col
else "coalesce((" <> col <> "), " <> Text.pack (show settingForeignKeyIdCoalesceValue) <> ")"
coalesceFKId ex =
app "coalesce" [ ex, num . Text.pack . show $ settingForeignKeyIdCoalesceValue ]
timeUnitColumnInsertSQL cName =
let colName = timeUnitColumnName dimIdColName cName settingTimeUnit
in ( colName
, "floor(extract(epoch from " <> fullColumnName fTableName cName <> ")/"
<> Text.pack (show $ timeUnitToSeconds settingTimeUnit) <> ")::bigint"
, cast (app "floor" [ binop "/" (extEpoch (eqi fTableName cName))
(num . Text.pack . show . timeUnitToSeconds $ settingTimeUnit) ])
"bigint"
, True
)
dimIdColumnInsertSQL cName =
let sCol = fromJust . findColumn cName $ tableColumns fTable
in (cName, coalesceColumn defaults fTableName sCol, True)
factColMap = concatFor (factColumns fact) $ \col -> case col of
DimTime cName -> [ timeUnitColumnInsertSQL cName ]
NoDimId cName -> [ dimIdColumnInsertSQL cName ]
TenantId cName -> [ dimIdColumnInsertSQL cName ]
FactCount scName cName ->
[ (cName, "count(" <> maybe "*" (fullColumnName fTableName) scName <> ")", False) ]
FactSum scName cName ->
[ (cName, "sum(" <> fullColumnName fTableName scName <> ")", False) ]
FactMax scName cName ->
[ (cName, "max(" <> fullColumnName fTableName scName <> ")", False) ]
FactMin scName cName ->
[ (cName, "min(" <> fullColumnName fTableName scName <> ")", False) ]
FactAverage scName cName ->
[ ( cName <> settingAvgCountColumSuffix
, "count(" <> fullColumnName fTableName scName <> ")"
, False
)
, ( cName <> settingAvgSumColumnSuffix
, "sum(" <> fullColumnName fTableName scName <> ")"
, False
)
]
FactCountDistinct _ cName -> [ (cName, "'{}'::json", False)]
_ -> []
app' f cName = app f [ eqi fTableName cName ]
factColMap = concatFor (factColumns fact) $ \FactColumn {factColTargetColumn = cName, ..} ->
case factColType of
DimTime -> [ timeUnitColumnInsertSQL cName ]
NoDimId -> [ dimIdColumnInsertSQL cName ]
TenantId -> [ dimIdColumnInsertSQL cName ]
FactCount {..} ->
[ (cName, app "count" [ maybe star (eqi fTableName) factColMaybeSourceColumn ], False) ]
FactCountDistinct {..} -> [ (cName, cast (str "{}") "json", False) ]
FactSum {..} -> [ (cName, app' "sum" factColSourceColumn, False) ]
FactMax {..} -> [ (cName, app' "max" factColSourceColumn, False) ]
FactMin {..} -> [ (cName, app' "min" factColSourceColumn, False) ]
FactAverage {..} ->
[ ( cName <> settingAvgCountColumSuffix, app' "count" factColSourceColumn, False )
, ( cName <> settingAvgSumColumnSuffix , app' "sum" factColSourceColumn , False)
]
_ -> []
dimColMap = for allDims $ \(dimFact, factTable@Table {tableName}) -> let
dimFKIdColName = factDimFKIdColumnName settingDimPrefix dimIdColName tableName
dimFKIdColName =
factDimFKIdColumnName settingDimPrefix dimIdColName dimFact factTable tables
factSourceTableName = factTableName dimFact
factSourceTable = fromJust . findTable factSourceTableName $ tables
dimFKIdColumn = fromJust . findColumn dimFKIdColName $ tableColumns factSourceTable
dimLookupWhereClauses =
[ fullColumnName tableName dimColName <> " = " <> coalesceColumn defaults factSourceTableName sourceCol
dimLookupWhereClauses = Just . foldBinop "and" $
[ binop "=" (eqi tableName dimColName) (coalesceColumn defaults factSourceTableName sourceCol)
| (dimColName, sourceColName) <- dimColumnMapping settingDimPrefix dimFact tableName
, let sourceCol = fromJust . findColumn sourceColName $ tableColumns factSourceTable ]
insertSQL = if factTable `elem` tables -- existing dimension table
insertExpr = if factTable `elem` tables -- existing dimension table
then (if columnNullable dimFKIdColumn == Null then coalesceFKId else id)
$ fullColumnName factSourceTableName dimFKIdColName
else "SELECT " <> dimIdColName <> " FROM "
<> suffixTableName popMode settingTableNameSuffixTemplate tableName <> " " <> tableName