Merge pull request #1 from quintype/hssqlppp

Use hssqlppp as the SQL generator
pull/1/head
Abhinav Sarkar 2016-02-03 17:50:32 +05:30
commit b58e022b0e
19 changed files with 1163 additions and 330 deletions

View File

@ -17,6 +17,7 @@ env:
- ARGS="--resolver lts-2"
- ARGS="--resolver lts-3"
- ARGS="--resolver lts-4"
- ARGS="--resolver lts-5"
- ARGS="--resolver lts"
- ARGS="--resolver nightly"
@ -29,7 +30,7 @@ before_install:
# This line does all of the work: installs GHC if necessary, build the library,
# executables, and test suites, and runs the test suites. --no-terminal works
# around some quirks in Travis's terminal implementation.
script: stack $ARGS --no-terminal --install-ghc install
script: stack $ARGS --no-terminal --install-ghc install --test --fast
# Caching so the next build will be fast too.
cache:

View File

@ -1,3 +1,5 @@
{-# LANGUAGE OverloadedStrings #-}
{-# LANGUAGE RecordWildCards #-}
module Main where
import qualified Data.ByteString.Lazy as BS
@ -7,7 +9,6 @@ import qualified Data.Text as Text
import Data.Aeson (encode)
import Data.Char (toLower)
import Data.List (nub)
import Data.Monoid ((<>))
import Control.Monad (forM_)
import System.Directory (createDirectoryIfMissing)
import System.FilePath ((</>), (<.>))
@ -80,4 +81,4 @@ writeFiles outputDir env@Env{..} = do
, factTablePopulateSQLs IncRefresh $ factTablePopulateSQL IncrementalPopulation
]
sqlStr s = Text.unpack $ s <> ";\n"
sqlStr = Text.unpack

View File

@ -1,3 +1,5 @@
{-# LANGUAGE OverloadedStrings #-}
{-# LANGUAGE RecordWildCards #-}
module Ringo.ArgParser (ProgArgs(..), parseArgs) where
import qualified Data.Text as Text

View File

@ -1,3 +1,6 @@
{-# LANGUAGE OverloadedStrings #-}
{-# LANGUAGE RecordWildCards #-}
{-# LANGUAGE CPP #-}
module Ringo.InputParser (parseInput) where
import qualified Data.Text as Text
@ -50,17 +53,17 @@ instance FromJSON FactColumn where
parseJSON (Object o) = do
cType <- o .: "type"
case cType of
"dimtime" -> DimTime <$> o .: "column"
"nodimid" -> NoDimId <$> o .: "column"
"tenantid" -> TenantId <$> o .: "column"
"dimid" -> DimId <$> o .: "table" <*> o .: "column"
"dimval" -> DimVal <$> o .: "table" <*> o .: "column"
"factcount" -> FactCount <$> o .:? "sourcecolumn" <*> o .: "column"
"factsum" -> FactSum <$> o .: "sourcecolumn" <*> o .: "column"
"factaverage" -> FactAverage <$> o .: "sourcecolumn" <*> o .: "column"
"factcountdistinct" -> FactCountDistinct <$> o .:? "sourcecolumn" <*> o .: "column"
"factmax" -> FactMax <$> o .: "sourcecolumn" <*> o .: "column"
"factmin" -> FactMin <$> o .: "sourcecolumn" <*> o .: "column"
"dimtime" -> FactColumn <$> o .: "column" <*> pure DimTime
"nodimid" -> FactColumn <$> o .: "column" <*> pure NoDimId
"tenantid" -> FactColumn <$> o .: "column" <*> pure TenantId
"dimid" -> FactColumn <$> o .: "column" <*> (DimId <$> o .: "table")
"dimval" -> FactColumn <$> o .: "column" <*> (DimVal <$> o .: "table")
"factcount" -> FactColumn <$> o .: "column" <*> (FactCount <$> o .:? "sourcecolumn")
"factcountdistinct" -> FactColumn <$> o .: "column" <*> (FactCountDistinct <$> o .:? "sourcecolumn")
"factsum" -> FactColumn <$> o .: "column" <*> (FactSum <$> o .: "sourcecolumn")
"factaverage" -> FactColumn <$> o .: "column" <*> (FactAverage <$> o .: "sourcecolumn")
"factmax" -> FactColumn <$> o .: "column" <*> (FactMax <$> o .: "sourcecolumn")
"factmin" -> FactColumn <$> o .: "column" <*> (FactMin <$> o .: "sourcecolumn")
_ -> fail $ "Invalid fact column type: " ++ cType
parseJSON o = fail $ "Cannot parse fact column: " ++ show o
@ -72,7 +75,7 @@ instance FromJSON Fact where
<*> o .: "columns"
parseJSON o = fail $ "Cannot parse fact: " ++ show o
data Input = Input [Table] [Fact] TypeDefaults deriving (Eq, Show)
data Input = Input [Table] [Fact] TypeDefaults deriving (Show)
instance FromJSON Input where
parseJSON (Object o) = Input <$> o .: "tables" <*> o .: "facts" <*> o .: "defaults"

View File

@ -22,6 +22,7 @@ library
Ringo.Validator,
Ringo.Extractor.Internal,
Ringo.Generator.Internal,
Ringo.Generator.Sql,
Ringo.Generator.Create,
Ringo.Generator.Populate.Dimension,
Ringo.Generator.Populate.Fact,
@ -32,9 +33,8 @@ library
mtl >=2.1 && <2.3,
raw-strings-qq >=1.0 && <1.2,
hssqlppp ==0.5.23
ghc-options: -Wall -fno-warn-unused-do-bind -funbox-strict-fields -fno-warn-orphans -O2
default-extensions: OverloadedStrings, RecordWildCards, ScopedTypeVariables, BangPatterns,
TupleSections, CPP, NamedFieldPuns
ghc-options: -Wall -Werror -fwarn-incomplete-uni-patterns -fno-warn-unused-do-bind
-fno-warn-orphans -funbox-strict-fields -O2
default-language: Haskell2010
executable ringo
@ -53,17 +53,21 @@ executable ringo
filepath >=1.3 && <1.5,
aeson >=0.8 && <0.11,
ringo
ghc-options: -Wall -fno-warn-unused-do-bind -funbox-strict-fields -fno-warn-orphans -O2
default-extensions: OverloadedStrings, RecordWildCards, ScopedTypeVariables, BangPatterns,
TupleSections, CPP, NamedFieldPuns
ghc-options: -Wall -Werror -fwarn-incomplete-uni-patterns -fno-warn-unused-do-bind
-fno-warn-orphans -funbox-strict-fields -O2
default-language: Haskell2010
test-suite ringo-test
type: exitcode-stdio-1.0
hs-source-dirs: test
main-is: Spec.hs
build-depends: base
, ringo
hs-source-dirs: test, src
main-is: doctests.hs
if impl(ghc >= 7.10)
build-depends: base
, ringo
, pretty-show >=1.6 && <1.7
, doctest >=0.9 && <0.11
else
buildable: False
default-language: Haskell2010
source-repository head

View File

@ -1,5 +1,10 @@
{-# LANGUAGE OverloadedStrings #-}
module Ringo
( module Ringo.Types
( -- | The examples in this module assume the following code has been run.
-- The :{ and :} will only work in GHCi.
-- $setup
module Ringo.Types
, extractFactTable
, extractDimensionTables
, extractDependencies
@ -19,31 +24,608 @@ import qualified Ringo.Extractor as E
import qualified Ringo.Generator as G
import qualified Ringo.Validator as V
-- $setup
-- >>> :set -XOverloadedStrings
-- >>> import Ringo
-- >>> import qualified Data.Map as Map
-- >>> import qualified Data.Text as Text
-- >>> import Data.List (nub)
-- >>> import Text.Show.Pretty
-- >>> :{
--let sessionEventsTable =
-- Table { tableName = "session_events"
-- , tableColumns =
-- [ Column "id" "uuid" NotNull
-- , Column "created_at" "timestamp without time zone" NotNull
-- , Column "member_id" "integer" Null
-- , Column "publisher_id" "integer" NotNull
-- , Column "user_agent" "character varying(1024)" Null
-- , Column "browser_name" "character varying(50)" Null
-- , Column "os" "character varying(50)" Null
-- , Column "geo_country_name" "character varying(50)" Null
-- , Column "geo_city_name" "character varying(50)" Null
-- , Column "geo_continent_name" "character varying(15)" Null
-- , Column "geo_most_specific_subdivision_name" "character varying(100)" Null
-- , Column "geo_longitude" "numeric(9,6)" Null
-- , Column "geo_latitude" "numeric(9,6)" Null
-- , Column "geo_time_zone" "character varying(20)" Null
-- , Column "geo_postal_code" "character varying(20)" Null
-- , Column "user_agent_name" "character varying(100)" Null
-- , Column "user_agent_type" "character varying(15)" Null
-- , Column "user_agent_version" "character varying(100)" Null
-- , Column "user_agent_device" "character varying(15)" Null
-- ]
-- , tableConstraints = [ PrimaryKey "id" ]
-- }
-- sessionFact =
-- Fact { factName = "session"
-- , factTableName = "session_events"
-- , factTablePersistent = True
-- , factParentNames = []
-- , factColumns =
-- [ FactColumn "created_at" $ DimTime
-- , FactColumn "publisher_id" $ NoDimId
-- , FactColumn "browser_name" $ DimVal "user_agent"
-- , FactColumn "os" $ DimVal "user_agent"
-- , FactColumn "user_agent_name" $ DimVal "user_agent"
-- , FactColumn "geo_country_name" $ DimVal "geo"
-- , FactColumn "geo_city_name" $ DimVal "geo"
-- , FactColumn "geo_continent_name" $ DimVal "geo"
-- , FactColumn "session_count" $ FactCount Nothing
-- ]
-- }
-- pageViewEventsTable =
-- Table { tableName = "page_view_events"
-- , tableColumns =
-- [ Column "id" "uuid" NotNull
-- , Column "created_at" "timestamp without time zone" NotNull
-- , Column "member_id" "integer" Null
-- , Column "publisher_id" "integer" NotNull
-- , Column "device_tracker_id" "uuid" Null
-- , Column "session_event_id" "uuid" Null
-- , Column "page_type" "character varying(20)" NotNull
-- , Column "referrer" "character varying(1024)" Null
-- , Column "url" "character varying(1024)" Null
-- , Column "referrer_id" "integer" Null
-- ]
-- , tableConstraints =
-- [ PrimaryKey "id"
-- , ForeignKey "session_events" [ ("session_event_id", "id")
-- , ("publisher_id", "publisher_id")
-- ]
-- , ForeignKey "referrers" [ ("referrer_id", "id")
-- , ("publisher_id", "publisher_id")
-- ]
-- ]
-- }
-- pageViewFact =
-- Fact { factName = "page_view"
-- , factTableName = "page_view_events"
-- , factTablePersistent = True
-- , factParentNames = [ "session" ]
-- , factColumns =
-- [ FactColumn "created_at" $ DimTime
-- , FactColumn "publisher_id" $ NoDimId
-- , FactColumn "page_type" $ DimVal "page_type"
-- , FactColumn "referrer_id" $ DimId "referrers"
-- , FactColumn "view_count" $ FactCount Nothing
-- ]
-- }
-- referrersTable =
-- Table { tableName = "referrers"
-- , tableColumns =
-- [ Column "id" "integer" NotNull
-- , Column "publisher_id" "integer" NotNull
-- , Column "name" "character varying(100)" NotNull
-- ]
-- , tableConstraints =
-- [ PrimaryKey "id"
-- , UniqueKey ["publisher_id", "name"]
-- ]
-- }
-- tables = [sessionEventsTable, pageViewEventsTable, referrersTable]
-- facts = [sessionFact, pageViewFact]
-- typeDefaults = Map.fromList [ ("integer", "-1")
-- , ("timestamp", "'00-00-00 00:00:00'")
-- , ("character", "'__UNKNOWN_VAL__'")
-- , ("uuid", "'00000000-0000-0000-0000-000000000000'::uuid")
-- , ("boolean", "false")
-- , ("json", "'{}'::json")
-- , ("numeric", "-1")
-- , ("text", "'__UNKNOWN_VAL__'")
-- ]
-- settings = defSettings { settingTableNameSuffixTemplate = "" }
-- env = Env tables facts settings typeDefaults
-- :}
-- |
--
-- >>> print $ extractFactTable env sessionFact
-- Table fact_session_by_minute
-- Column created_at_minute_id bigint NOT NULL
-- Column publisher_id integer NOT NULL
-- Column session_count integer NOT NULL
-- Column geo_id integer NOT NULL
-- Column user_agent_id integer NOT NULL
-- UniqueKey (created_at_minute_id, publisher_id, geo_id, user_agent_id)
-- <BLANKLINE>
-- >>> print $ extractFactTable env pageViewFact
-- Table fact_page_view_by_minute
-- Column created_at_minute_id bigint NOT NULL
-- Column publisher_id integer NOT NULL
-- Column view_count integer NOT NULL
-- Column referrer_id integer NOT NULL
-- Column page_type_id integer NOT NULL
-- Column geo_id integer NOT NULL
-- Column user_agent_id integer NOT NULL
-- UniqueKey (created_at_minute_id, publisher_id, referrer_id, page_type_id, geo_id, user_agent_id)
-- <BLANKLINE>
extractFactTable :: Env -> Fact -> Table
extractFactTable env = flip runReader env . E.extractFactTable
-- |
--
-- >>> mapM_ print $ extractDimensionTables env sessionFact
-- Table dim_geo
-- Column id serial NOT NULL
-- Column country_name character varying(50) NOT NULL
-- Column city_name character varying(50) NOT NULL
-- Column continent_name character varying(15) NOT NULL
-- PrimaryKey id
-- UniqueKey (country_name, city_name, continent_name)
-- <BLANKLINE>
-- Table dim_user_agent
-- Column id serial NOT NULL
-- Column browser_name character varying(50) NOT NULL
-- Column os character varying(50) NOT NULL
-- Column name character varying(100) NOT NULL
-- PrimaryKey id
-- UniqueKey (browser_name, os, name)
-- <BLANKLINE>
-- >>> mapM_ print . filter (`notElem` tables) $ extractDimensionTables env pageViewFact
-- Table dim_page_type
-- Column id serial NOT NULL
-- Column page_type character varying(20) NOT NULL
-- PrimaryKey id
-- UniqueKey (page_type)
-- <BLANKLINE>
extractDimensionTables :: Env -> Fact -> [Table]
extractDimensionTables env = flip runReader env . E.extractDimensionTables
-- |
--
-- >>> putStrLn . ppShow $ extractDependencies env sessionFact
-- fromList
-- [ ( "dim_geo" , [ "session_events" ] )
-- , ( "dim_user_agent" , [ "session_events" ] )
-- , ( "fact_session_by_minute"
-- , [ "session_events" , "dim_user_agent" , "dim_geo" ]
-- )
-- ]
-- >>> putStrLn . ppShow $ extractDependencies env pageViewFact
-- fromList
-- [ ( "dim_page_type" , [ "page_view_events" ] )
-- , ( "fact_page_view_by_minute"
-- , [ "page_view_events"
-- , "session_events"
-- , "dim_page_type"
-- , "referrers"
-- , "dim_user_agent"
-- , "dim_geo"
-- ]
-- )
-- ]
extractDependencies :: Env -> Fact -> Dependencies
extractDependencies env = flip runReader env . E.extractDependencies
-- |
--
-- >>> let dimTables = filter (`notElem` tables) . nub . concatMap (extractDimensionTables env) $ facts
-- >>> let sqls = map (dimensionTableDefnSQL env) dimTables
-- >>> mapM_ (\sqls -> mapM_ (putStr . Text.unpack) sqls >> putStrLn "--------" ) sqls
-- create table dim_geo (
-- id serial not null,
-- country_name character varying(50) not null,
-- city_name character varying(50) not null,
-- continent_name character varying(15) not null
-- )
-- ;
-- <BLANKLINE>
-- alter table dim_geo add primary key (id);
-- <BLANKLINE>
-- alter table dim_geo add unique (country_name,
-- city_name,
-- continent_name);
-- <BLANKLINE>
-- create index on dim_geo (country_name)
-- ;
-- create index on dim_geo (city_name)
-- ;
-- create index on dim_geo (continent_name)
-- ;
-- --------
-- create table dim_user_agent (
-- id serial not null,
-- browser_name character varying(50) not null,
-- os character varying(50) not null,
-- name character varying(100) not null
-- )
-- ;
-- <BLANKLINE>
-- alter table dim_user_agent add primary key (id);
-- <BLANKLINE>
-- alter table dim_user_agent add unique (browser_name, os, name);
-- <BLANKLINE>
-- create index on dim_user_agent (browser_name)
-- ;
-- create index on dim_user_agent (os)
-- ;
-- create index on dim_user_agent (name)
-- ;
-- --------
-- create table dim_page_type (
-- id serial not null,
-- page_type character varying(20) not null
-- )
-- ;
-- <BLANKLINE>
-- alter table dim_page_type add primary key (id);
-- <BLANKLINE>
-- alter table dim_page_type add unique (page_type);
-- <BLANKLINE>
-- --------
dimensionTableDefnSQL :: Env -> Table -> [Text]
dimensionTableDefnSQL env = flip runReader env . G.dimensionTableDefnSQL
-- |
--
-- >>> let storySessionFactTable = extractFactTable env sessionFact
-- >>> let sqls = factTableDefnSQL env sessionFact storySessionFactTable
-- >>> mapM_ (putStr . Text.unpack) sqls
-- create table fact_session_by_minute (
-- created_at_minute_id bigint not null,
-- publisher_id integer not null,
-- session_count integer not null,
-- geo_id integer not null,
-- user_agent_id integer not null
-- )
-- ;
-- <BLANKLINE>
-- alter table fact_session_by_minute add unique (created_at_minute_id,
-- publisher_id,
-- geo_id,
-- user_agent_id);
-- <BLANKLINE>
-- create index on fact_session_by_minute (created_at_minute_id)
-- ;
-- create index on fact_session_by_minute (publisher_id)
-- ;
-- create index on fact_session_by_minute (geo_id)
-- ;
-- create index on fact_session_by_minute (user_agent_id)
-- ;
-- >>> let pageViewFactTable = extractFactTable env pageViewFact
-- >>> let sqls = factTableDefnSQL env pageViewFact pageViewFactTable
-- >>> mapM_ (putStr . Text.unpack) sqls
-- create table fact_page_view_by_minute (
-- created_at_minute_id bigint not null,
-- publisher_id integer not null,
-- view_count integer not null,
-- referrer_id integer not null,
-- page_type_id integer not null,
-- geo_id integer not null,
-- user_agent_id integer not null
-- )
-- ;
-- <BLANKLINE>
-- alter table fact_page_view_by_minute add unique (created_at_minute_id,
-- publisher_id,
-- referrer_id,
-- page_type_id,
-- geo_id,
-- user_agent_id);
-- <BLANKLINE>
-- create index on fact_page_view_by_minute (created_at_minute_id)
-- ;
-- create index on fact_page_view_by_minute (publisher_id)
-- ;
-- create index on fact_page_view_by_minute (referrer_id)
-- ;
-- create index on fact_page_view_by_minute (page_type_id)
-- ;
-- create index on fact_page_view_by_minute (geo_id)
-- ;
-- create index on fact_page_view_by_minute (user_agent_id)
-- ;
factTableDefnSQL :: Env -> Fact -> Table -> [Text]
factTableDefnSQL env fact = flip runReader env . G.factTableDefnSQL fact
-- |
--
-- >>> let storySessionDimTableNames = map tableName $ extractDimensionTables env sessionFact
-- >>> let sqls = map (dimensionTablePopulateSQL FullPopulation env sessionFact) storySessionDimTableNames
-- >>> mapM_ (putStr . Text.unpack) sqls
-- insert into dim_geo (country_name, city_name, continent_name)
-- select distinct
-- coalesce(session_events.geo_country_name,'__UNKNOWN_VAL__') as geo_country_name,
-- coalesce(session_events.geo_city_name,'__UNKNOWN_VAL__') as geo_city_name,
-- coalesce(session_events.geo_continent_name,'__UNKNOWN_VAL__') as geo_continent_name
-- from
-- session_events
-- where
-- (geo_country_name is not null or geo_city_name is not null or geo_continent_name is not null)
-- and
-- created_at < ?
-- ;
-- <BLANKLINE>
-- insert into dim_user_agent (browser_name, os, name)
-- select distinct
-- coalesce(session_events.browser_name,'__UNKNOWN_VAL__') as browser_name,
-- coalesce(session_events.os,'__UNKNOWN_VAL__') as os,
-- coalesce(session_events.user_agent_name,'__UNKNOWN_VAL__') as user_agent_name
-- from
-- session_events
-- where
-- (browser_name is not null or os is not null or user_agent_name is not null)
-- and
-- created_at < ?
-- ;
-- <BLANKLINE>
-- >>> let sqls = map (dimensionTablePopulateSQL IncrementalPopulation env sessionFact) storySessionDimTableNames
-- >>> mapM_ (putStr . Text.unpack) sqls
-- insert into dim_geo (country_name, city_name, continent_name)
-- select
-- x.*
-- from
-- (select distinct
-- coalesce(session_events.geo_country_name,'__UNKNOWN_VAL__') as geo_country_name,
-- coalesce(session_events.geo_city_name,'__UNKNOWN_VAL__') as geo_city_name,
-- coalesce(session_events.geo_continent_name,'__UNKNOWN_VAL__') as geo_continent_name
-- from
-- session_events
-- where
-- (geo_country_name is not null or geo_city_name is not null or geo_continent_name is not null)
-- and
-- created_at < ?
-- and
-- created_at >= ?) as x
-- left outer join
-- dim_geo
-- on dim_geo.country_name = x.geo_country_name
-- and
-- dim_geo.city_name = x.geo_city_name
-- and
-- dim_geo.continent_name = x.geo_continent_name
-- where
-- dim_geo.country_name is null and dim_geo.city_name is null
-- and
-- dim_geo.continent_name is null
-- ;
-- <BLANKLINE>
-- insert into dim_user_agent (browser_name, os, name)
-- select
-- x.*
-- from
-- (select distinct
-- coalesce(session_events.browser_name,'__UNKNOWN_VAL__') as browser_name,
-- coalesce(session_events.os,'__UNKNOWN_VAL__') as os,
-- coalesce(session_events.user_agent_name,'__UNKNOWN_VAL__') as user_agent_name
-- from
-- session_events
-- where
-- (browser_name is not null or os is not null or user_agent_name is not null)
-- and
-- created_at < ?
-- and
-- created_at >= ?) as x
-- left outer join
-- dim_user_agent
-- on dim_user_agent.browser_name = x.browser_name
-- and
-- dim_user_agent.os = x.os
-- and
-- dim_user_agent.name = x.user_agent_name
-- where
-- dim_user_agent.browser_name is null and dim_user_agent.os is null
-- and
-- dim_user_agent.name is null
-- ;
-- <BLANKLINE>
-- >>> let pageViewDimTableNames = map tableName . filter (`notElem` tables) $ extractDimensionTables env pageViewFact
-- >>> let sqls = map (dimensionTablePopulateSQL FullPopulation env pageViewFact) pageViewDimTableNames
-- >>> mapM_ (putStr . Text.unpack) sqls
-- insert into dim_page_type (page_type)
-- select distinct
-- page_view_events.page_type as page_type
-- from
-- page_view_events
-- where
-- (page_type is not null) and created_at < ?
-- ;
-- <BLANKLINE>
-- >>> let sqls = map (dimensionTablePopulateSQL IncrementalPopulation env pageViewFact) pageViewDimTableNames
-- >>> mapM_ (putStr . Text.unpack) sqls
-- insert into dim_page_type (page_type)
-- select
-- x.*
-- from
-- (select distinct
-- page_view_events.page_type as page_type
-- from
-- page_view_events
-- where
-- (page_type is not null) and created_at < ?
-- and
-- created_at >= ?) as x
-- left outer join
-- dim_page_type
-- on dim_page_type.page_type = x.page_type
-- where
-- dim_page_type.page_type is null
-- ;
-- <BLANKLINE>
dimensionTablePopulateSQL :: TablePopulationMode -> Env -> Fact -> TableName -> Text
dimensionTablePopulateSQL popMode env fact =
flip runReader env . G.dimensionTablePopulateSQL popMode fact
-- |
--
-- >>> let sqls = factTablePopulateSQL FullPopulation env sessionFact
-- >>> mapM_ (putStr . Text.unpack) sqls
-- insert into fact_session_by_minute (created_at_minute_id,
-- publisher_id,
-- session_count,
-- geo_id,
-- user_agent_id)
-- select
-- cast(floor(extract(epoch from session_events.created_at) / 60) as bigint) as xxff_created_at_minute_id,
-- session_events.publisher_id as xxff_publisher_id,
-- count(*) as xxff_session_count,
-- coalesce((select
-- id
-- from
-- dim_geo as dim_geo
-- where
-- dim_geo.country_name = coalesce(session_events.geo_country_name,'__UNKNOWN_VAL__')
-- and
-- dim_geo.city_name = coalesce(session_events.geo_city_name,'__UNKNOWN_VAL__')
-- and
-- dim_geo.continent_name = coalesce(session_events.geo_continent_name,'__UNKNOWN_VAL__')),-1) as xxff_geo_id,
-- coalesce((select
-- id
-- from
-- dim_user_agent as dim_user_agent
-- where
-- dim_user_agent.browser_name = coalesce(session_events.browser_name,'__UNKNOWN_VAL__')
-- and
-- dim_user_agent.os = coalesce(session_events.os,'__UNKNOWN_VAL__')
-- and
-- dim_user_agent.name = coalesce(session_events.user_agent_name,'__UNKNOWN_VAL__')),-1) as xxff_user_agent_id
-- from
-- session_events
-- where
-- session_events.created_at < ?
-- group by
-- xxff_created_at_minute_id,
-- xxff_publisher_id,
-- xxff_geo_id,
-- xxff_user_agent_id
-- ;
-- <BLANKLINE>
-- >>> let sqls = factTablePopulateSQL IncrementalPopulation env sessionFact
-- >>> mapM_ (putStr . Text.unpack) sqls
-- insert into fact_session_by_minute (created_at_minute_id,
-- publisher_id,
-- session_count,
-- geo_id,
-- user_agent_id)
-- select
-- cast(floor(extract(epoch from session_events.created_at) / 60) as bigint) as xxff_created_at_minute_id,
-- session_events.publisher_id as xxff_publisher_id,
-- count(*) as xxff_session_count,
-- coalesce((select
-- id
-- from
-- dim_geo as dim_geo
-- where
-- dim_geo.country_name = coalesce(session_events.geo_country_name,'__UNKNOWN_VAL__')
-- and
-- dim_geo.city_name = coalesce(session_events.geo_city_name,'__UNKNOWN_VAL__')
-- and
-- dim_geo.continent_name = coalesce(session_events.geo_continent_name,'__UNKNOWN_VAL__')),-1) as xxff_geo_id,
-- coalesce((select
-- id
-- from
-- dim_user_agent as dim_user_agent
-- where
-- dim_user_agent.browser_name = coalesce(session_events.browser_name,'__UNKNOWN_VAL__')
-- and
-- dim_user_agent.os = coalesce(session_events.os,'__UNKNOWN_VAL__')
-- and
-- dim_user_agent.name = coalesce(session_events.user_agent_name,'__UNKNOWN_VAL__')),-1) as xxff_user_agent_id
-- from
-- session_events
-- where
-- session_events.created_at < ? and session_events.created_at >= ?
-- group by
-- xxff_created_at_minute_id,
-- xxff_publisher_id,
-- xxff_geo_id,
-- xxff_user_agent_id
-- ;
-- <BLANKLINE>
-- >>> let sqls = factTablePopulateSQL FullPopulation env pageViewFact
-- >>> mapM_ (putStr . Text.unpack) sqls
-- insert into fact_page_view_by_minute (created_at_minute_id,
-- publisher_id,
-- view_count,
-- referrer_id,
-- page_type_id,
-- geo_id,
-- user_agent_id)
-- select
-- cast(floor(extract(epoch from page_view_events.created_at) / 60) as bigint) as xxff_created_at_minute_id,
-- page_view_events.publisher_id as xxff_publisher_id,
-- count(*) as xxff_view_count,
-- coalesce(page_view_events.referrer_id,-1) as xxff_referrer_id,
-- coalesce((select
-- id
-- from
-- dim_page_type as dim_page_type
-- where
-- dim_page_type.page_type = page_view_events.page_type),-1) as xxff_page_type_id,
-- coalesce((select
-- id
-- from
-- dim_geo as dim_geo
-- where
-- dim_geo.country_name = coalesce(session_events.geo_country_name,'__UNKNOWN_VAL__')
-- and
-- dim_geo.city_name = coalesce(session_events.geo_city_name,'__UNKNOWN_VAL__')
-- and
-- dim_geo.continent_name = coalesce(session_events.geo_continent_name,'__UNKNOWN_VAL__')),-1) as xxff_geo_id,
-- coalesce((select
-- id
-- from
-- dim_user_agent as dim_user_agent
-- where
-- dim_user_agent.browser_name = coalesce(session_events.browser_name,'__UNKNOWN_VAL__')
-- and
-- dim_user_agent.os = coalesce(session_events.os,'__UNKNOWN_VAL__')
-- and
-- dim_user_agent.name = coalesce(session_events.user_agent_name,'__UNKNOWN_VAL__')),-1) as xxff_user_agent_id
-- from
-- page_view_events
-- left outer join
-- session_events
-- on page_view_events.session_event_id = session_events.id
-- and
-- page_view_events.publisher_id = session_events.publisher_id
-- where
-- page_view_events.created_at < ?
-- group by
-- xxff_created_at_minute_id,
-- xxff_publisher_id,
-- xxff_referrer_id,
-- xxff_page_type_id,
-- xxff_geo_id,
-- xxff_user_agent_id
-- ;
-- <BLANKLINE>
factTablePopulateSQL :: TablePopulationMode -> Env -> Fact -> [Text]
factTablePopulateSQL popMode env =
flip runReader env . G.factTablePopulateSQL popMode
-- |
--
-- >>> concatMap (validateTable env) tables
-- []
validateTable :: Env -> Table -> [ValidationError]
validateTable env = flip runReader env . V.validateTable
-- |
--
-- >>> concatMap (validateFact env) facts
-- []
validateFact :: Env -> Fact -> [ValidationError]
validateFact env = flip runReader env . V.validateFact

View File

@ -1,3 +1,6 @@
{-# LANGUAGE OverloadedStrings #-}
{-# LANGUAGE RecordWildCards #-}
{-# LANGUAGE GADTs #-}
module Ringo.Extractor
( extractDimensionTables
, extractAllDimensionTables
@ -30,34 +33,36 @@ extractFactTable fact = do
notNullSourceColumnCopy cName = (sourceColumn cName) { columnNullable = NotNull }
notNullSourceColumnRename scName cName = (notNullSourceColumnCopy scName) { columnName = cName }
columns = concatFor (factColumns fact) $ \col -> case col of
DimTime cName ->
[ Column (timeUnitColumnName dimIdColName cName settingTimeUnit) "bigint" NotNull ]
NoDimId cName -> [ notNullSourceColumnCopy cName ]
TenantId cName -> [ notNullSourceColumnCopy cName ]
FactCount _ cName -> [ Column cName countColType NotNull ]
FactSum scName cName -> [ notNullSourceColumnRename scName cName ]
FactMax scName cName -> [ notNullSourceColumnRename scName cName ]
FactMin scName cName -> [ notNullSourceColumnRename scName cName ]
FactAverage scName cName ->
[ Column (cName <> settingAvgCountColumSuffix) countColType NotNull
, notNullSourceColumnRename scName (cName <> settingAvgSumColumnSuffix)
]
FactCountDistinct _ cName -> [ Column cName "json" NotNull ]
_ -> []
columns = concatFor (factColumns fact) $ \FactColumn {factColTargetColumn = cName, ..} ->
case factColType of
DimTime ->
[ Column (timeUnitColumnName dimIdColName cName settingTimeUnit) "bigint" NotNull ]
NoDimId -> [ notNullSourceColumnCopy cName ]
TenantId -> [ notNullSourceColumnCopy cName ]
FactCount {..} -> [ Column cName countColType NotNull ]
FactCountDistinct {..} -> [ Column cName "json" NotNull ]
FactSum {..} -> [ notNullSourceColumnRename factColSourceColumn cName ]
FactMax {..} -> [ notNullSourceColumnRename factColSourceColumn cName ]
FactMin {..} -> [ notNullSourceColumnRename factColSourceColumn cName ]
FactAverage {..} ->
[ Column (cName <> settingAvgCountColumSuffix) countColType NotNull
, notNullSourceColumnRename factColSourceColumn (cName <> settingAvgSumColumnSuffix)
]
_ -> []
fkColumns = for allDims $ \(_, Table {..}) ->
let colName = factDimFKIdColumnName settingDimPrefix dimIdColName tableName
colType = idColTypeToFKIdColType settingDimTableIdColumnType
fkColumns = for allDims $ \(dimFact, dimTable) ->
let colName = factDimFKIdColumnName settingDimPrefix dimIdColName dimFact dimTable tables
colType = idColTypeToFKIdColType settingDimTableIdColumnType
in Column colName colType NotNull
ukColNames =
(++ map columnName fkColumns)
. forMaybe (factColumns fact) $ \col -> case col of
DimTime cName -> Just (timeUnitColumnName dimIdColName cName settingTimeUnit)
NoDimId cName -> Just cName
TenantId cName -> Just cName
_ -> Nothing
. forMaybe (factColumns fact) $ \FactColumn {factColTargetColumn = cName, ..} ->
case factColType of
DimTime -> Just $ timeUnitColumnName dimIdColName cName settingTimeUnit
NoDimId -> Just cName
TenantId -> Just cName
_ -> Nothing
return Table
{ tableName =
@ -75,15 +80,15 @@ extractDependencies fact = do
(factTableName fct, parentFacts fct facts)
factDimDeps =
nub . concat . Tree.flatten . flip Tree.unfoldTree fact $ \fct ->
( forMaybe (factColumns fct) $ \col -> case col of
DimVal table _ -> Just $ settingDimPrefix <> table
DimId table _ -> Just table
_ -> Nothing
( forMaybe (factColumns fct) $ \FactColumn {..} -> case factColType of
DimVal {..} -> Just $ settingDimPrefix <> factColTargetTable
DimId {..} -> Just factColTargetTable
_ -> Nothing
, parentFacts fct facts
)
dimDeps = Map.fromList [ (settingDimPrefix <> table, [factTableName fact])
| DimVal table _ <- factColumns fact ]
| FactColumn {factColType = DimVal table} <- factColumns fact ]
factDeps = Map.singleton (extractedTable settings) (factSourceDeps ++ factDimDeps)
return $ Map.union dimDeps factDeps

View File

@ -1,3 +1,8 @@
{-# LANGUAGE OverloadedStrings #-}
{-# LANGUAGE RecordWildCards #-}
{-# LANGUAGE TupleSections #-}
{-# LANGUAGE CPP #-}
{-# LANGUAGE GADTs #-}
module Ringo.Extractor.Internal where
import qualified Data.Map as Map
@ -34,9 +39,13 @@ timeUnitColumnName :: Text -> ColumnName -> TimeUnit -> ColumnName
timeUnitColumnName dimIdColName colName timeUnit =
colName <> "_" <> timeUnitName timeUnit <> "_" <> dimIdColName
factDimFKIdColumnName :: Text -> Text -> TableName -> ColumnName
factDimFKIdColumnName dimPrefix dimIdColName dimTableName =
fromMaybe dimTableName (Text.stripPrefix dimPrefix dimTableName) <> "_" <> dimIdColName
factDimFKIdColumnName :: Text -> Text -> Fact -> Table -> [Table] -> ColumnName
factDimFKIdColumnName dimPrefix dimIdColName dimFact dimTable@Table { .. } tables =
if dimTable `elem` tables
then head [ factColTargetColumn
| FactColumn {factColType = DimId {..}, ..} <- factColumns dimFact
, factColTargetTable == tableName ]
else fromMaybe tableName (Text.stripPrefix dimPrefix tableName) <> "_" <> dimIdColName
extractedFactTableName :: Text -> Text -> TableName -> TimeUnit -> TableName
extractedFactTableName factPrefix factInfix factName timeUnit =
@ -56,7 +65,9 @@ extractDimensionTables fact = do
let table = fromJust . findTable (factTableName fact) $ tables
return $ dimsFromIds tables ++ dimsFromVals settings (tableColumns table)
where
dimsFromIds tables = catMaybes [ findTable d tables | DimId d _ <- factColumns fact ]
dimsFromIds tables =
catMaybes [ findTable factColTargetTable tables
| FactColumn {factColType = DimId {..}} <- factColumns fact ]
dimsFromVals Settings {..} tableColumns =
map (\(dim, cols) ->
@ -75,9 +86,9 @@ extractDimensionTables fact = do
. nub)
. Map.fromListWith (flip (++))
. mapMaybe (\fcol -> do
DimVal d col <- fcol
column <- findColumn col tableColumns
return (d, [ column ]))
FactColumn {factColType = DimVal {..}, ..} <- fcol
column <- findColumn factColTargetColumn tableColumns
return (factColTargetTable, [ column ]))
. map Just
. factColumns
$ fact

View File

@ -1,78 +1,96 @@
{-# LANGUAGE OverloadedStrings #-}
{-# LANGUAGE RecordWildCards #-}
{-# LANGUAGE CPP #-}
{-# LANGUAGE GADTs #-}
{-# LANGUAGE PatternSynonyms #-}
module Ringo.Generator.Create (dimensionTableDefnSQL, factTableDefnSQL) where
#if MIN_VERSION_base(4,8,0)
#else
import Control.Applicative ((<$>))
import Control.Applicative ((<$>))
#endif
import Control.Monad.Reader (Reader, asks)
import Data.Maybe (listToMaybe, maybeToList)
import Data.Monoid ((<>))
import Data.Text (Text)
import Control.Monad.Reader (Reader, asks)
import Database.HsSqlPpp.Syntax ( Statement(..), RowConstraint(..), AlterTableAction(..)
, AlterTableOperation(..), Constraint(..), Cascade(..) )
import Data.Maybe (listToMaybe, maybeToList)
import Data.Monoid ((<>))
import Data.Text (Text)
import Ringo.Extractor.Internal
import Ringo.Generator.Internal
import Ringo.Generator.Sql
import Ringo.Types
import Ringo.Utils
tableDefnSQL :: Table -> Reader Env [Text]
tableDefnSQL Table {..} = do
tableDefnStmts :: Table -> Reader Env [Statement]
tableDefnStmts Table {..} = do
Settings {..} <- asks envSettings
let tabName = tableName <> settingTableNameSuffixTemplate
tableSQL = "CREATE TABLE " <> tabName <> " (\n"
<> (joinColumnNames . map columnDefnSQL $ tableColumns)
<> "\n)"
tableSQL = CreateTable ea (name tabName) (map columnDefnSQL tableColumns) [] Nothing
columnDefnSQL Column {..} =
columnName <> " " <> columnType <> " " <> nullableDefnSQL columnNullable
attDef columnName columnType $ nullableDefnSQL columnNullable
nullableDefnSQL Null = "NULL"
nullableDefnSQL NotNull = "NOT NULL"
nullableDefnSQL Null = NullConstraint ea ""
nullableDefnSQL NotNull = NotNullConstraint ea ""
constraintDefnSQL constraint =
let alterTableSQL = "ALTER TABLE ONLY " <> tabName <> " ADD "
in case constraint of
PrimaryKey cName -> [ alterTableSQL <> "PRIMARY KEY (" <> cName <> ")" ]
ForeignKey oTableName cNamePairs ->
[ alterTableSQL <> "FOREIGN KEY (" <> joinColumnNames (map fst cNamePairs) <> ") REFERENCES "
<> oTableName <> " (" <> joinColumnNames (map snd cNamePairs) <> ")" ]
UniqueKey cNames -> ["CREATE UNIQUE INDEX ON " <> tabName <> " (" <> joinColumnNames cNames <> ")"]
let constr = case constraint of
PrimaryKey cName -> PrimaryKeyConstraint ea "" [nmc cName]
ForeignKey oTableName cNamePairs ->
ReferenceConstraint ea "" (map (nmc . fst) cNamePairs)
(name oTableName) (map (nmc . snd) cNamePairs) Restrict Restrict
UniqueKey cNames -> UniqueConstraint ea "" $ map nmc cNames
return $ tableSQL : concatMap constraintDefnSQL tableConstraints
in AlterTable ea (name tabName) $ AlterTableActions ea [AddConstraint ea constr]
return $ tableSQL : map constraintDefnSQL tableConstraints
tableDefnSQL :: Table -> (Table -> Reader Env [Statement]) -> Reader Env [Text]
tableDefnSQL table indexFn = do
ds <- map ppStatement <$> tableDefnStmts table
is <- map (\st -> ppStatement st <> ";\n") <$> indexFn table
return $ ds ++ is
dimensionTableDefnSQL :: Table -> Reader Env [Text]
dimensionTableDefnSQL table@Table {..} = do
dimensionTableDefnSQL table = tableDefnSQL table dimensionTableIndexStmts
dimensionTableIndexStmts :: Table -> Reader Env [Statement]
dimensionTableIndexStmts Table {..} = do
Settings {..} <- asks envSettings
let tabName = tableName <> settingTableNameSuffixTemplate
tablePKColName = head [ cName | PrimaryKey cName <- tableConstraints ]
nonPKColNames = [ cName | Column cName _ _ <- tableColumns, cName /= tablePKColName ]
indexSQLs = [ "CREATE INDEX ON " <> tabName <> " (" <> cName <> ")"
| cName <- nonPKColNames, length nonPKColNames > 1 ]
(++ indexSQLs) <$> tableDefnSQL table
return [ CreateIndexTSQL ea (nmc "") (name tabName) [nmc cName]
| cName <- nonPKColNames, length nonPKColNames > 1 ]
factTableDefnSQL :: Fact -> Table -> Reader Env [Text]
factTableDefnSQL fact table = do
factTableDefnSQL fact table = tableDefnSQL table (factTableIndexStmts fact)
factTableIndexStmts :: Fact -> Table -> Reader Env [Statement]
factTableIndexStmts fact table = do
Settings {..} <- asks envSettings
tables <- asks envTables
allDims <- extractAllDimensionTables fact
let dimTimeCol = head [ cName | DimTime cName <- factColumns fact ]
tenantIdCol = listToMaybe [ cName | TenantId cName <- factColumns fact ]
let dimTimeCol = head [ cName | DimTimeV cName <- factColumns fact ]
tenantIdCol = listToMaybe [ cName | TenantIdV cName <- factColumns fact ]
tabName = tableName table <> settingTableNameSuffixTemplate
dimTimeColName cName = timeUnitColumnName settingDimTableIdColumnName cName settingTimeUnit
factCols = forMaybe (factColumns fact) $ \col -> case col of
DimTime cName -> Just $ dimTimeColName cName
NoDimId cName -> Just cName
TenantId cName -> Just cName
_ -> Nothing
factCols = forMaybe (factColumns fact) $ \FactColumn {factColTargetColumn = cName, ..} ->
case factColType of
DimTime -> Just [dimTimeColName cName]
NoDimId -> Just [cName]
TenantId -> Just [cName]
_ -> Nothing
dimCols = [ factDimFKIdColumnName settingDimPrefix settingDimTableIdColumnName tableName
| (_, Table {..}) <- allDims ]
dimCols = [ [ factDimFKIdColumnName settingDimPrefix settingDimTableIdColumnName dimFact dimTable tables ]
| (dimFact, dimTable) <- allDims ]
indexSQLs = [ "CREATE INDEX ON " <> tableName table <> settingTableNameSuffixTemplate
<> " USING btree (" <> col <> ")"
| col <- factCols ++ dimCols ++ [ cName <> ", " <> dimTimeColName dimTimeCol
return [ CreateIndexTSQL ea (nmc "") (name tabName) (map nmc cols)
| cols <- factCols ++ dimCols ++ [ [cName, dimTimeColName dimTimeCol]
| cName <- maybeToList tenantIdCol ] ]
(++ indexSQLs) <$> tableDefnSQL table

View File

@ -1,34 +1,33 @@
{-# LANGUAGE OverloadedStrings #-}
{-# LANGUAGE RecordWildCards #-}
{-# LANGUAGE GADTs #-}
module Ringo.Generator.Internal where
import qualified Data.Map as Map
import qualified Data.Text as Text
import Data.List (find)
import Data.Monoid ((<>))
import Data.Text (Text)
import Database.HsSqlPpp.Syntax (ScalarExpr)
import Data.List (find)
import Data.Monoid ((<>))
import Data.Text (Text)
import Ringo.Extractor.Internal
import Ringo.Generator.Sql
import Ringo.Types
joinColumnNames :: [ColumnName] -> Text
joinColumnNames = Text.intercalate ",\n"
fullColumnName :: TableName -> ColumnName -> ColumnName
fullColumnName tName cName = tName <> "." <> cName
dimColumnMapping :: Text -> Fact -> TableName -> [(ColumnName, ColumnName)]
dimColumnMapping dimPrefix fact dimTableName =
[ (dimColumnName dName cName, cName)
| DimVal dName cName <- factColumns fact
, dimPrefix <> dName == dimTableName ]
[ (dimColumnName factColTargetTable factColTargetColumn, factColTargetColumn)
| FactColumn { factColType = DimVal {..}, ..} <- factColumns fact
, dimPrefix <> factColTargetTable == dimTableName ]
coalesceColumn :: TypeDefaults -> TableName -> Column -> Text
coalesceColumn :: TypeDefaults -> TableName -> Column -> ScalarExpr
coalesceColumn defaults tName Column{..} =
if columnNullable == Null
then "coalesce(" <> fqColName <> ", " <> defVal columnType <> ")"
then app "coalesce" [fqColName, num $ defVal columnType]
else fqColName
where
fqColName = fullColumnName tName columnName
fqColName = eqi tName columnName
defVal colType =
maybe (error $ "Default value not known for column type: " ++ Text.unpack colType) snd

View File

@ -1,55 +1,62 @@
module Ringo.Generator.Populate.Dimension (dimensionTablePopulateSQL) where
{-# LANGUAGE OverloadedStrings #-}
{-# LANGUAGE RecordWildCards #-}
{-# LANGUAGE CPP #-}
{-# LANGUAGE PatternSynonyms #-}
import qualified Data.Text as Text
module Ringo.Generator.Populate.Dimension (dimensionTablePopulateSQL) where
#if MIN_VERSION_base(4,8,0)
#else
import Control.Applicative ((<$>))
import Control.Applicative ((<$>))
#endif
import Control.Monad.Reader (Reader, asks)
import Data.Maybe (fromJust)
import Data.Monoid ((<>))
import Data.Text (Text)
import Control.Monad.Reader (Reader, asks)
import Database.HsSqlPpp.Syntax (Statement, QueryExpr(..), Distinct(..), makeSelect, JoinType(..))
import Data.Maybe (fromJust)
import Data.Text (Text)
import Ringo.Extractor.Internal
import Ringo.Generator.Internal
import Ringo.Generator.Sql
import Ringo.Types
dimensionTablePopulateSQL :: TablePopulationMode -> Fact -> TableName -> Reader Env Text
dimensionTablePopulateSQL popMode fact dimTableName = do
dimensionTablePopulateSQL popMode fact dimTableName =
ppStatement <$> dimensionTablePopulateStmt popMode fact dimTableName
dimensionTablePopulateStmt :: TablePopulationMode -> Fact -> TableName -> Reader Env Statement
dimensionTablePopulateStmt popMode fact dimTableName = do
Settings {..} <- asks envSettings
tables <- asks envTables
defaults <- asks envTypeDefaults
let factTable = fromJust $ findTable (factTableName fact) tables
colMapping = dimColumnMapping settingDimPrefix fact dimTableName
selectCols = [ coalesceColumn defaults (factTableName fact) col <> " AS " <> cName
selectCols = [ flip sia (nmc cName) $ coalesceColumn defaults (factTableName fact) col
| (_, cName) <- colMapping
, let col = fromJust . findColumn cName $ tableColumns factTable ]
timeCol = head [ cName | DimTime cName <- factColumns fact ]
baseSelectC = "SELECT DISTINCT\n" <> joinColumnNames selectCols
<> "\nFROM " <> factTableName fact
baseWhereCs = [ "(\n"
<> Text.intercalate "\nOR " [ c <> " IS NOT NULL" | (_, c) <- colMapping ]
<> "\n)"
, timeCol <> " < ?"
]
timeCol = head ([ cName | DimTimeV cName <- factColumns fact ] :: [ColumnName])
isNotNullC = parens . foldBinop "or" . map (postop "isnotnull" . ei . snd) $ colMapping
selectWhereC = Just . foldBinop "and" $
[ isNotNullC, binop "<" (ei timeCol) placeholder ] ++
[ binop ">=" (ei timeCol) placeholder | popMode == IncrementalPopulation ]
selectC = makeSelect
{ selDistinct = Distinct
, selSelectList = sl selectCols
, selTref = [tref $ factTableName fact]
, selWhere = selectWhereC
}
insertC selectC whereCs =
"INSERT INTO "
<> suffixTableName popMode settingTableNameSuffixTemplate dimTableName
<> " (\n" <> joinColumnNames (map fst colMapping) <> "\n) "
<> "SELECT x.* FROM (\n"
<> selectC <> "\nWHERE " <> Text.intercalate " AND\n" whereCs
<> ") x"
iTableName = suffixTableName popMode settingTableNameSuffixTemplate dimTableName
insertC = insert iTableName (map fst colMapping) $ case popMode of
FullPopulation -> selectC
IncrementalPopulation -> let alias = "x" in
makeSelect
{ selSelectList = sl [si $ qstar alias]
, selTref =
[ tjoin (subtrefa alias selectC) LeftOuter (tref dimTableName) . Just $
foldBinop "and" [ binop "=" (eqi dimTableName c1) (eqi alias c2) | (c1, c2) <- colMapping ] ]
, selWhere =
Just . foldBinop "and" . map (postop "isnull" . eqi dimTableName . fst) $ colMapping
}
return $ case popMode of
FullPopulation -> insertC baseSelectC baseWhereCs
IncrementalPopulation ->
insertC baseSelectC (baseWhereCs ++ [ timeCol <> " >= ?" ])
<> "\nLEFT JOIN " <> dimTableName <> " ON\n"
<> Text.intercalate " \nAND "
[ fullColumnName dimTableName c1 <> " = " <> fullColumnName "x" c2
| (c1, c2) <- colMapping ]
<> "\nWHERE " <> Text.intercalate " \nAND "
[ fullColumnName dimTableName c <> " IS NULL" | (c, _) <- colMapping ]
return insertC

View File

@ -1,22 +1,33 @@
{-# LANGUAGE OverloadedStrings #-}
{-# LANGUAGE RecordWildCards #-}
{-# LANGUAGE ScopedTypeVariables #-}
{-# LANGUAGE CPP #-}
{-# LANGUAGE NamedFieldPuns #-}
{-# LANGUAGE QuasiQuotes #-}
{-# LANGUAGE GADTs #-}
{-# LANGUAGE PatternSynonyms #-}
module Ringo.Generator.Populate.Fact (factTablePopulateSQL) where
import qualified Data.Text as Text
#if MIN_VERSION_base(4,8,0)
#else
import Control.Applicative ((<$>))
import Control.Applicative ((<$>))
#endif
import Control.Monad.Reader (Reader, asks)
import Data.List (nub)
import Data.Maybe (fromJust, fromMaybe, mapMaybe, listToMaybe)
import Data.Monoid ((<>))
import Data.Text (Text)
import Text.RawString.QQ (r)
import Control.Monad.Reader (Reader, asks)
import Database.HsSqlPpp.Syntax ( QueryExpr(..), Statement, makeSelect
, SelectList(..), SelectItem(..), JoinType(..) )
import Data.List (nub)
import Data.Maybe (fromJust, fromMaybe, listToMaybe)
import Data.Monoid ((<>))
import Data.Text (Text)
import Text.RawString.QQ (r)
import Ringo.Extractor.Internal
import Ringo.Generator.Internal
import Ringo.Generator.Sql
import Ringo.Types
import Ringo.Utils
@ -43,72 +54,78 @@ BEGIN
RETURN r;
END;
$$
LANGUAGE 'plpgsql' IMMUTABLE|]
LANGUAGE 'plpgsql' IMMUTABLE;
|]
data FactTablePopulateSelectSQL = FactTablePopulateSelectSQL
{ ftpsSelectCols :: ![(Text, Text)]
, ftpsSelectTable :: !Text
, ftpsJoinClauses :: ![Text]
, ftpsWhereClauses :: ![Text]
, ftpsGroupByCols :: ![Text]
} deriving (Show, Eq)
factCountDistinctUpdateStmts :: TablePopulationMode -> Fact -> Text -> QueryExpr -> Reader Env [Statement]
factCountDistinctUpdateStmts popMode fact groupByColPrefix expr = case expr of
Select {selSelectList = SelectList _ origSelectItems, ..} -> do
Settings {..} <- asks envSettings
tables <- asks envTables
let fTableName = factTableName fact
fTable = fromJust . findTable fTableName $ tables
tablePKColName = head [ cName | PrimaryKey cName <- tableConstraints fTable ]
extFactTableName =
suffixTableName popMode settingTableNameSuffixTemplate
$ extractedFactTableName settingFactPrefix settingFactInfix (factName fact) settingTimeUnit
factTableUpdateSQL :: TablePopulationMode -> Fact -> Text -> FactTablePopulateSelectSQL -> Reader Env [Text]
factTableUpdateSQL popMode fact groupByColPrefix populateSelectSQL@FactTablePopulateSelectSQL {..} = do
Settings {..} <- asks envSettings
tables <- asks envTables
let countDistinctCols = [ col | col@(FactCountDistinct _ _) <- factColumns fact]
fTableName = factTableName fact
fTable = fromJust . findTable fTableName $ tables
tablePKColName = head [ cName | PrimaryKey cName <- tableConstraints fTable ]
extFactTableName =
suffixTableName popMode settingTableNameSuffixTemplate
$ extractedFactTableName settingFactPrefix settingFactInfix (factName fact) settingTimeUnit
return $ forMaybe (factColumns fact) $ \FactColumn {factColTargetColumn = cName, ..} ->
case factColType of
FactCountDistinct {factColMaybeSourceColumn = scName} ->
let unqCol = cast (eqi fTableName (fromMaybe tablePKColName scName)) "text"
return . (\xs -> if null xs then xs else ilog2FunctionString : xs)
$ for countDistinctCols $ \(FactCountDistinct scName cName) ->
let unqCol = fullColumnName fTableName (fromMaybe tablePKColName scName) <> "::text"
bucketSelectCols =
[ sia (binop "&" (app "hashtext" [ unqCol ])
(num . Text.pack . show $ bucketCount settingFactCountDistinctErrorRate - 1))
(nmc $ cName <> "_bnum")
, sia (binop "-"
(num "31")
(app "ilog2"
[ app "min" [ binop "&"
(app "hashtext" [ unqCol ])
(prefop "~" (parens (binop "<<" (num "1") (num "31"))))]]))
(nmc $ cName <> "_bhash")
]
bucketSelectCols =
[ ( "hashtext(" <> unqCol <> ") & "
<> Text.pack (show $ bucketCount settingFactCountDistinctErrorRate - 1)
, cName <> "_bnum"
)
, ( "31 - ilog2(min(hashtext(" <> unqCol <> ") & ~(1 << 31)))"
, cName <> "_bhash"
)
]
groupByCols = map ppScalarExpr selGroupBy
selectList =
[ i | i@(SelectItem _ _ a) <- origSelectItems , a `elem` map nmc groupByCols ]
selectSQL = toSelectSQL $
populateSelectSQL
{ ftpsSelectCols = filter ((`elem` ftpsGroupByCols) . snd) ftpsSelectCols ++ bucketSelectCols
, ftpsGroupByCols = ftpsGroupByCols ++ [ cName <> "_bnum" ]
, ftpsWhereClauses = ftpsWhereClauses ++ [ unqCol <> " IS NOT NULL" ]
}
selectStmt =
makeSelect
{ selSelectList = sl $ selectList ++ bucketSelectCols
, selTref = selTref
, selWhere = binop "and" (postop "isnotnull" unqCol) <$> selWhere
, selGroupBy = selGroupBy ++ [ ei $ cName <> "_bnum" ]
}
aggSelectClause =
"json_object_agg(" <> cName <> "_bnum, " <> cName <> "_bhash) AS " <> cName
aggSelectClause =
sia (app "json_object_agg" [ ei (cName <> "_bnum"), ei (cName <> "_bhash") ]) (nmc cName)
in "UPDATE " <> extFactTableName
<> "\nSET " <> cName <> " = " <> fullColumnName "xyz" cName
<> "\nFROM ("
<> "\nSELECT " <> joinColumnNames (ftpsGroupByCols ++ [aggSelectClause])
<> "\nFROM (\n" <> selectSQL <> "\n) zyx"
<> "\nGROUP BY \n" <> joinColumnNames ftpsGroupByCols
<> "\n) xyz"
<> "\n WHERE\n"
<> Text.intercalate "\nAND "
[ fullColumnName extFactTableName .fromJust . Text.stripPrefix groupByColPrefix $ col
<> " = " <> fullColumnName "xyz" col
| col <- ftpsGroupByCols ]
in Just $ update extFactTableName
[ (cName, eqi "xyz" cName) ]
[ subtrefa "xyz"
makeSelect
{ selSelectList = sl $ map (si . ei) groupByCols ++ [ aggSelectClause ]
, selTref = [ subtrefa "zyx" selectStmt ]
, selGroupBy = selGroupBy
} ] $
foldBinop "and"
[ binop "=" (eqi extFactTableName . fromJust . Text.stripPrefix groupByColPrefix $ col)
(eqi "xyz" col)
| col <- groupByCols ]
_ -> Nothing
_ -> return []
where
bucketCount :: Double -> Integer
bucketCount errorRate =
let power :: Double = fromIntegral (ceiling . logBase 2 $ (1.04 / errorRate) ** 2 :: Integer)
in ceiling $ 2 ** power
factTablePopulateSQL :: TablePopulationMode -> Fact -> Reader Env [Text]
factTablePopulateSQL popMode fact = do
factTablePopulateStmts :: TablePopulationMode -> Fact -> Reader Env [Statement]
factTablePopulateStmts popMode fact = do
Settings {..} <- asks envSettings
allDims <- extractAllDimensionTables fact
tables <- asks envTables
@ -117,116 +134,104 @@ factTablePopulateSQL popMode fact = do
fTable = fromJust . findTable fTableName $ tables
dimIdColName = settingDimTableIdColumnName
coalesceFKId col =
if "coalesce" `Text.isPrefixOf` col
then col
else "coalesce((" <> col <> "), " <> Text.pack (show settingForeignKeyIdCoalesceValue) <> ")"
coalesceFKId ex =
app "coalesce" [ ex, num . Text.pack . show $ settingForeignKeyIdCoalesceValue ]
timeUnitColumnInsertSQL cName =
let colName = timeUnitColumnName dimIdColName cName settingTimeUnit
in ( colName
, "floor(extract(epoch from " <> fullColumnName fTableName cName <> ")/"
<> Text.pack (show $ timeUnitToSeconds settingTimeUnit) <> ")::bigint"
, cast (app "floor" [ binop "/" (extEpoch (eqi fTableName cName))
(num . Text.pack . show . timeUnitToSeconds $ settingTimeUnit) ])
"bigint"
, True
)
dimIdColumnInsertSQL cName =
let sCol = fromJust . findColumn cName $ tableColumns fTable
in (cName, coalesceColumn defaults fTableName sCol, True)
factColMap = concatFor (factColumns fact) $ \col -> case col of
DimTime cName -> [ timeUnitColumnInsertSQL cName ]
NoDimId cName -> [ dimIdColumnInsertSQL cName ]
TenantId cName -> [ dimIdColumnInsertSQL cName ]
FactCount scName cName ->
[ (cName, "count(" <> maybe "*" (fullColumnName fTableName) scName <> ")", False) ]
FactSum scName cName ->
[ (cName, "sum(" <> fullColumnName fTableName scName <> ")", False) ]
FactMax scName cName ->
[ (cName, "max(" <> fullColumnName fTableName scName <> ")", False) ]
FactMin scName cName ->
[ (cName, "min(" <> fullColumnName fTableName scName <> ")", False) ]
FactAverage scName cName ->
[ ( cName <> settingAvgCountColumSuffix
, "count(" <> fullColumnName fTableName scName <> ")"
, False
)
, ( cName <> settingAvgSumColumnSuffix
, "sum(" <> fullColumnName fTableName scName <> ")"
, False
)
]
FactCountDistinct _ cName -> [ (cName, "'{}'::json", False)]
_ -> []
app' f cName = app f [ eqi fTableName cName ]
factColMap = concatFor (factColumns fact) $ \FactColumn {factColTargetColumn = cName, ..} ->
case factColType of
DimTime -> [ timeUnitColumnInsertSQL cName ]
NoDimId -> [ dimIdColumnInsertSQL cName ]
TenantId -> [ dimIdColumnInsertSQL cName ]
FactCount {..} ->
[ (cName, app "count" [ maybe star (eqi fTableName) factColMaybeSourceColumn ], False) ]
FactCountDistinct {..} -> [ (cName, cast (str "{}") "json", False) ]
FactSum {..} -> [ (cName, app' "sum" factColSourceColumn, False) ]
FactMax {..} -> [ (cName, app' "max" factColSourceColumn, False) ]
FactMin {..} -> [ (cName, app' "min" factColSourceColumn, False) ]
FactAverage {..} ->
[ ( cName <> settingAvgCountColumSuffix, app' "count" factColSourceColumn, False )
, ( cName <> settingAvgSumColumnSuffix , app' "sum" factColSourceColumn , False)
]
_ -> []
dimColMap = for allDims $ \(dimFact, factTable@Table {tableName}) -> let
dimFKIdColName = factDimFKIdColumnName settingDimPrefix dimIdColName tableName
dimFKIdColName =
factDimFKIdColumnName settingDimPrefix dimIdColName dimFact factTable tables
factSourceTableName = factTableName dimFact
factSourceTable = fromJust . findTable factSourceTableName $ tables
dimFKIdColumn = fromJust . findColumn dimFKIdColName $ tableColumns factSourceTable
dimLookupWhereClauses =
[ fullColumnName tableName dimColName <> " = " <> coalesceColumn defaults factSourceTableName sourceCol
dimLookupWhereClauses = Just . foldBinop "and" $
[ binop "=" (eqi tableName dimColName) (coalesceColumn defaults factSourceTableName sourceCol)
| (dimColName, sourceColName) <- dimColumnMapping settingDimPrefix dimFact tableName
, let sourceCol = fromJust . findColumn sourceColName $ tableColumns factSourceTable ]
insertSQL = if factTable `elem` tables -- existing dimension table
insertExpr = if factTable `elem` tables -- existing dimension table
then (if columnNullable dimFKIdColumn == Null then coalesceFKId else id)
$ fullColumnName factSourceTableName dimFKIdColName
else "SELECT " <> dimIdColName <> " FROM "
<> suffixTableName popMode settingTableNameSuffixTemplate tableName <> " " <> tableName
<> "\nWHERE " <> Text.intercalate "\n AND " dimLookupWhereClauses
in (dimFKIdColName, coalesceFKId insertSQL, True)
$ eqi factSourceTableName dimFKIdColName
else coalesceFKId . subQueryExp $
makeSelect
{ selSelectList = sl [ si $ ei dimIdColName ]
, selTref =
[ trefa (suffixTableName popMode settingTableNameSuffixTemplate tableName) tableName ]
, selWhere = dimLookupWhereClauses
}
in (dimFKIdColName, insertExpr, True)
colMap = [ (cName, (sql, groupByColPrefix <> cName), addToGroupBy)
| (cName, sql, addToGroupBy) <- factColMap ++ dimColMap ]
colMap = [ (cName, (expr, nmc $ groupByColPrefix <> cName), addToGroupBy)
| (cName, expr, addToGroupBy) <- factColMap ++ dimColMap ]
joinClauses =
mapMaybe (\tName -> (\p -> "LEFT JOIN " <> tName <> "\nON "<> p) <$> joinClausePreds fTable tName)
map (tref &&& joinClausePreds fTable)
. filter (/= fTableName)
. nub
. map (factTableName . fst)
$ allDims
timeCol = fullColumnName fTableName $ head [ cName | DimTime cName <- factColumns fact ]
timeCol = eqi fTableName $ head [ cName | DimTimeV cName <- factColumns fact ]
extFactTableName = suffixTableName popMode settingTableNameSuffixTemplate
$ extractedFactTableName settingFactPrefix settingFactInfix (factName fact) settingTimeUnit
populateSelectSQL =
FactTablePopulateSelectSQL
{ ftpsSelectCols = map snd3 colMap
, ftpsSelectTable = fTableName
, ftpsJoinClauses = joinClauses
, ftpsWhereClauses =
timeCol <> " < ?" : [ timeCol <> " >= ?" | popMode == IncrementalPopulation ]
, ftpsGroupByCols = map ((groupByColPrefix <>) . fst3) . filter thd3 $ colMap
populateSelectExpr =
makeSelect
{ selSelectList = sl . map (uncurry sia . snd3) $ colMap
, selTref = [ foldl (\tf (t, oc) -> tjoin tf LeftOuter t oc) (tref fTableName) joinClauses ]
, selWhere = Just . foldBinop "and" $
binop "<" timeCol placeholder :
[ binop ">=" timeCol placeholder | popMode == IncrementalPopulation ]
, selGroupBy = map (ei . (groupByColPrefix <>) . fst3) . filter thd3 $ colMap
}
insertIntoSQL = "INSERT INTO " <> extFactTableName
<> " (\n" <> Text.intercalate ",\n " (map fst3 colMap) <> "\n)\n"
<> toSelectSQL populateSelectSQL
insertIntoStmt = insert extFactTableName (map fst3 colMap) populateSelectExpr
updateSQLs <- factTableUpdateSQL popMode fact groupByColPrefix populateSelectSQL
return $ insertIntoSQL : updateSQLs
updateStmts <- factCountDistinctUpdateStmts popMode fact groupByColPrefix populateSelectExpr
return $ insertIntoStmt : updateStmts
where
groupByColPrefix = "xxff_"
joinClausePreds table oTableName =
Text.intercalate " AND "
. map (\(c1, c2) -> fullColumnName (tableName table) c1 <> " = " <> fullColumnName oTableName c2)
foldBinop "and"
. map (\(c1, c2) -> binop "=" (eqi (tableName table) c1) (eqi oTableName c2))
<$> listToMaybe [ colPairs | ForeignKey tName colPairs <- tableConstraints table
, tName == oTableName ]
toSelectSQL :: FactTablePopulateSelectSQL -> Text
toSelectSQL FactTablePopulateSelectSQL {..} =
"SELECT \n" <> joinColumnNames (map (uncurry asName) ftpsSelectCols)
<> "\nFROM " <> ftpsSelectTable
<> (if not . null $ ftpsJoinClauses
then "\n" <> Text.intercalate "\n" ftpsJoinClauses
else "")
<> (if not . null $ ftpsWhereClauses
then "\nWHERE " <> Text.intercalate "\nAND " ftpsWhereClauses
else "")
<> "\nGROUP BY \n"
<> joinColumnNames ftpsGroupByCols
where
asName sql alias = "(" <> sql <> ")" <> " as " <> alias
factTablePopulateSQL :: TablePopulationMode -> Fact -> Reader Env [Text]
factTablePopulateSQL popMode fact = do
stmts <- factTablePopulateStmts popMode fact
return $ case stmts of
[] -> []
[i] -> [ ppStatement i ]
i:us -> [ ppStatement i, ilog2FunctionString ] ++ map ppStatement us

128
src/Ringo/Generator/Sql.hs Normal file
View File

@ -0,0 +1,128 @@
{-# LANGUAGE OverloadedStrings #-}
module Ringo.Generator.Sql where
import qualified Data.Text as Text
import qualified Data.Text.Lazy as TL
import Database.HsSqlPpp.Annotation
import Database.HsSqlPpp.Dialect
import Database.HsSqlPpp.Pretty
import Database.HsSqlPpp.Syntax
import Data.Text (Text)
ea :: Annotation
ea = emptyAnnotation
name :: Text -> Name
name n = Name ea [nmc n]
nmc :: Text -> NameComponent
nmc = Nmc . Text.unpack
attDef :: Text -> Text -> RowConstraint -> AttributeDef
attDef nam typ constr =
AttributeDef ea (nmc nam) (SimpleTypeName ea $ name typ) Nothing [constr]
member :: ScalarExpr -> ScalarExpr -> ScalarExpr
member = BinaryOp ea (name ".")
num :: Text -> ScalarExpr
num = NumberLit ea . Text.unpack
str :: Text -> ScalarExpr
str = StringLit ea . Text.unpack
extEpoch :: ScalarExpr -> ScalarExpr
extEpoch = Extract ea ExtractEpoch
app :: Text -> [ScalarExpr] -> ScalarExpr
app n = App ea (name n)
cast :: ScalarExpr -> Text -> ScalarExpr
cast ex = Cast ea ex . SimpleTypeName ea . name
prefop :: Text -> ScalarExpr -> ScalarExpr
prefop n = PrefixOp ea (name n)
postop :: Text -> ScalarExpr -> ScalarExpr
postop n = PostfixOp ea (name n)
binop :: Text -> ScalarExpr -> ScalarExpr -> ScalarExpr
binop n = BinaryOp ea (name n)
foldBinop :: Text -> [ScalarExpr] -> ScalarExpr
foldBinop _ [] = error "List must be non empty"
foldBinop n (a : as) = foldl (binop n) a as
placeholder :: ScalarExpr
placeholder = Placeholder ea
parens :: ScalarExpr -> ScalarExpr
parens = Parens ea
qstar :: Text -> ScalarExpr
qstar = QStar ea . nmc
star :: ScalarExpr
star = Star ea
subQueryExp :: QueryExpr -> ScalarExpr
subQueryExp = ScalarSubQuery ea
-- Table ref
tref :: Text -> TableRef
tref = Tref ea . name
-- Table ref alias
trefa :: Text -> Text -> TableRef
trefa t a = TableAlias ea (nmc a) $ Tref ea (name t)
-- Subquery Table ref alias
subtrefa :: Text -> QueryExpr -> TableRef
subtrefa a = TableAlias ea (nmc a) . SubTref ea
-- Table join
tjoin :: TableRef -> JoinType -> TableRef -> Maybe ScalarExpr -> TableRef
tjoin ta jt tb on = JoinTref ea ta Unnatural jt Nothing tb (fmap (JoinOn ea) on)
-- Select item
si :: ScalarExpr -> SelectItem
si = SelExp ea
-- Select item alias
sia :: ScalarExpr -> NameComponent -> SelectItem
sia = SelectItem ea
-- Expression qualified identifier
eqi :: Text -> Text -> ScalarExpr
eqi c = Identifier ea . qn c
-- Expression identifier
ei :: Text -> ScalarExpr
ei = Identifier ea . name
-- Qualified name
qn :: Text -> Text -> Name
qn c n = Name ea [nmc c, nmc n]
-- Select list
sl :: [SelectItem] -> SelectList
sl = SelectList ea
-- Insert statement
insert :: Text -> [Text] -> QueryExpr -> Statement
insert tName cNames selectExp =
Insert ea (name tName) (map nmc cNames) selectExp Nothing
-- Update statement
update :: Text -> [(Text, ScalarExpr)] -> [TableRef] -> ScalarExpr -> Statement
update tName setClauseList fromList whr =
Update ea (name tName) (map (uncurry (SetClause ea . nmc)) setClauseList) fromList (Just whr) Nothing
-- Pretty print statement
ppStatement :: Statement -> Text
ppStatement st = TL.toStrict $ prettyStatements (PrettyFlags postgresDialect) [st]
-- Pretty print scalar expression
ppScalarExpr :: ScalarExpr -> Text
ppScalarExpr = TL.toStrict . prettyScalarExpr (PrettyFlags postgresDialect)

View File

@ -1,32 +1,62 @@
{-# LANGUAGE OverloadedStrings #-}
{-# LANGUAGE RecordWildCards #-}
{-# LANGUAGE GADTs #-}
{-# LANGUAGE Rank2Types #-}
{-# LANGUAGE StandaloneDeriving #-}
{-# LANGUAGE PatternSynonyms #-}
module Ringo.Types where
import qualified Data.Text as Text
import Data.Map (Map)
import Data.Text (Text)
import Data.Map (Map)
import Data.Monoid ((<>))
import Data.Text (Text)
showColNames :: [Text] -> String
showColNames cols = Text.unpack $ "(" <> Text.intercalate ", " cols <> ")"
type ColumnName = Text
type ColumnType = Text
type TableName = Text
data Nullable = Null | NotNull deriving (Eq, Enum, Show)
data Nullable = Null | NotNull deriving (Eq, Enum)
instance Show Nullable where
show Null = "NULL"
show NotNull = "NOT NULL"
data Column = Column
{ columnName :: !ColumnName
, columnType :: !ColumnType
, columnNullable :: !Nullable
} deriving (Eq, Show)
} deriving (Eq)
instance Show Column where
show Column {..} = "Column "
++ Text.unpack columnName ++ " "
++ Text.unpack columnType ++ " "
++ show columnNullable
data TableConstraint = PrimaryKey !ColumnName
| UniqueKey ![ColumnName]
| ForeignKey !TableName ![(ColumnName, ColumnName)]
deriving (Eq, Show)
deriving (Eq)
instance Show TableConstraint where
show (PrimaryKey col) = "PrimaryKey " ++ Text.unpack col
show (UniqueKey cols) = "UniqueKey " ++ showColNames cols
show (ForeignKey tName colMap) = "ForeignKey " ++ showColNames (map fst colMap) ++ " "
++ Text.unpack tName ++ " " ++ showColNames (map snd colMap)
data Table = Table
{ tableName :: !TableName
, tableColumns :: ![Column]
, tableConstraints :: ![TableConstraint]
} deriving (Eq, Show)
} deriving (Eq)
instance Show Table where
show Table {..} =
unlines $ ("Table " ++ Text.unpack tableName) : map show tableColumns ++ map show tableConstraints
data TimeUnit = Second | Minute | Hour | Day | Week
deriving (Eq, Enum, Show, Read)
@ -47,33 +77,59 @@ data Fact = Fact
, factTablePersistent :: !Bool
, factParentNames :: ![TableName]
, factColumns :: ![FactColumn]
} deriving (Eq, Show)
} deriving (Show)
data FactColumn = DimTime !ColumnName
| NoDimId !ColumnName
| TenantId !ColumnName
| DimId !TableName !ColumnName
| DimVal !TableName !ColumnName
| FactCount !(Maybe ColumnName) !ColumnName
| FactSum !ColumnName !ColumnName
| FactAverage !ColumnName !ColumnName
| FactCountDistinct !(Maybe ColumnName) !ColumnName
| FactMax !ColumnName !ColumnName
| FactMin !ColumnName !ColumnName
deriving (Eq, Show)
data FCTNone
data FCTTargetTable
data FCTMaybeSourceColumn
data FCTSourceColumn
data FactColumnType a where
DimTime :: FactColumnType FCTNone
NoDimId :: FactColumnType FCTNone
TenantId :: FactColumnType FCTNone
DimId :: { factColTargetTable :: !TableName } -> FactColumnType FCTTargetTable
DimVal :: { factColTargetTable :: !TableName } -> FactColumnType FCTTargetTable
FactCount :: { factColMaybeSourceColumn :: !(Maybe ColumnName) } -> FactColumnType FCTMaybeSourceColumn
FactCountDistinct :: { factColMaybeSourceColumn :: !(Maybe ColumnName) } -> FactColumnType FCTMaybeSourceColumn
FactSum :: { factColSourceColumn :: !ColumnName } -> FactColumnType FCTSourceColumn
FactAverage :: { factColSourceColumn :: !ColumnName } -> FactColumnType FCTSourceColumn
FactMax :: { factColSourceColumn :: !ColumnName } -> FactColumnType FCTSourceColumn
FactMin :: { factColSourceColumn :: !ColumnName } -> FactColumnType FCTSourceColumn
deriving instance Show (FactColumnType a)
pattern DimTimeV col <- FactColumn col DimTime
pattern NoDimIdV col <- FactColumn col NoDimId
pattern TenantIdV col <- FactColumn col TenantId
pattern DimIdV col <- FactColumn col DimId {..}
pattern DimValV col <- FactColumn col DimVal {..}
pattern FactCountV col <- FactColumn col FactCount {..}
pattern FactCountDistinctV col <- FactColumn col FactCountDistinct {..}
pattern FactSumV col <- FactColumn col FactSum {..}
pattern FactAverageV col <- FactColumn col FactAverage {..}
pattern FactMaxV col <- FactColumn col FactMax {..}
pattern FactMinV col <- FactColumn col FactMin {..}
data FactColumn = forall a. FactColumn
{ factColTargetColumn :: !ColumnName
, factColType :: FactColumnType a }
deriving instance Show FactColumn
factSourceColumnName :: FactColumn -> Maybe ColumnName
factSourceColumnName (DimTime cName) = Just cName
factSourceColumnName (NoDimId cName) = Just cName
factSourceColumnName (TenantId cName) = Just cName
factSourceColumnName (DimId _ cName) = Just cName
factSourceColumnName (DimVal _ cName) = Just cName
factSourceColumnName (FactCount cName _) = cName
factSourceColumnName (FactSum cName _) = Just cName
factSourceColumnName (FactAverage cName _) = Just cName
factSourceColumnName (FactCountDistinct cName _) = cName
factSourceColumnName (FactMax cName _) = Just cName
factSourceColumnName (FactMin cName _) = Just cName
factSourceColumnName FactColumn {..} = case factColType of
DimTime -> Just factColTargetColumn
NoDimId -> Just factColTargetColumn
TenantId -> Just factColTargetColumn
DimId {..} -> Just factColTargetColumn
DimVal {..} -> Just factColTargetColumn
FactCount {..} -> factColMaybeSourceColumn
FactCountDistinct {..} -> factColMaybeSourceColumn
FactSum {..} -> Just factColSourceColumn
FactAverage {..} -> Just factColSourceColumn
FactMax {..} -> Just factColSourceColumn
FactMin {..} -> Just factColSourceColumn
data Settings = Settings
{ settingDimPrefix :: !Text
@ -127,7 +183,7 @@ data Env = Env
, envFacts :: ![Fact]
, envSettings :: !Settings
, envTypeDefaults :: !TypeDefaults
} deriving (Eq, Show)
} deriving (Show)
data TablePopulationMode = FullPopulation | IncrementalPopulation deriving (Eq, Show)

View File

@ -1,3 +1,5 @@
{-# LANGUAGE OverloadedStrings #-}
{-# LANGUAGE CPP #-}
module Ringo.Utils where
import qualified Control.Arrow as Arrow

View File

@ -1,3 +1,9 @@
{-# LANGUAGE OverloadedStrings #-}
{-# LANGUAGE RecordWildCards #-}
{-# LANGUAGE CPP #-}
{-# LANGUAGE GADTs #-}
{-# LANGUAGE PatternSynonyms #-}
module Ringo.Validator
( validateTable
, validateFact
@ -48,17 +54,19 @@ validateFact Fact {..} = do
parentVs <- concat <$> mapM checkFactParents factParentNames
let colVs = concatMap (checkColumn tables table) factColumns
timeVs = [ MissingTimeColumn factTableName
| null [ c | DimTime c <- factColumns ] ]
notNullVs = [ MissingNotNullConstraint factTableName c
| DimTime c <- factColumns
, let col = findColumn c (tableColumns table)
| null ([ cName | DimTimeV cName <- factColumns ] :: [ColumnName]) ]
notNullVs = [ MissingNotNullConstraint factTableName cName
| DimTimeV cName <- factColumns
, let col = findColumn cName (tableColumns table)
, isJust col
, columnNullable (fromJust col) == Null ]
typeDefaultVs =
[ MissingTypeDefault cType
| cName <- [ c | DimVal _ c <- factColumns ]
++ [ c | NoDimId c <- factColumns ]
++ [ c | TenantId c <- factColumns ]
| cName <- [ c | DimValV c <- factColumns ]
++ [ c | NoDimIdV c <- factColumns ]
++ [ c | TenantIdV c <- factColumns ]
++ [ c | DimIdV c <- factColumns ]
, let col = findColumn cName (tableColumns table)
, isJust col
, let cType = columnType $ fromJust col
@ -76,6 +84,7 @@ validateFact Fact {..} = do
maybe [] (checkTableForCol table) (factSourceColumnName factCol)
++ checkColumnTable tables factCol
checkColumnTable tables factCol = case factCol of
DimId tName _ -> maybe [ MissingTable tName ] (const []) $ findTable tName tables
_ -> []
checkColumnTable :: [Table] -> FactColumn -> [ValidationError]
checkColumnTable tables FactColumn {..} = case factColType of
DimId {factColTargetTable = tName} -> maybe [ MissingTable tName ] (const []) $ findTable tName tables
_ -> []

View File

@ -1,7 +1,7 @@
# For more information, see: https://github.com/commercialhaskell/stack/blob/master/doc/yaml_configuration.md
# Specifies the GHC version and set of packages available (e.g., lts-3.5, nightly-2015-09-21, ghc-7.10.2)
resolver: lts-3.20
resolver: lts-5.1
# Local packages, usually specified by relative directory name
packages:

View File

@ -1,2 +0,0 @@
main :: IO ()
main = putStrLn "Test suite not yet implemented"

2
test/doctests.hs Normal file
View File

@ -0,0 +1,2 @@
import Test.DocTest
main = doctest ["-isrc", "Ringo"]