Merge pull request #1 from quintype/hssqlppp

Use hssqlppp as the SQL generator
pull/1/head
Abhinav Sarkar 2016-02-03 17:50:32 +05:30
commit b58e022b0e
19 changed files with 1163 additions and 330 deletions

View File

@ -17,6 +17,7 @@ env:
- ARGS="--resolver lts-2" - ARGS="--resolver lts-2"
- ARGS="--resolver lts-3" - ARGS="--resolver lts-3"
- ARGS="--resolver lts-4" - ARGS="--resolver lts-4"
- ARGS="--resolver lts-5"
- ARGS="--resolver lts" - ARGS="--resolver lts"
- ARGS="--resolver nightly" - ARGS="--resolver nightly"
@ -29,7 +30,7 @@ before_install:
# This line does all of the work: installs GHC if necessary, build the library, # This line does all of the work: installs GHC if necessary, build the library,
# executables, and test suites, and runs the test suites. --no-terminal works # executables, and test suites, and runs the test suites. --no-terminal works
# around some quirks in Travis's terminal implementation. # around some quirks in Travis's terminal implementation.
script: stack $ARGS --no-terminal --install-ghc install script: stack $ARGS --no-terminal --install-ghc install --test --fast
# Caching so the next build will be fast too. # Caching so the next build will be fast too.
cache: cache:

View File

@ -1,3 +1,5 @@
{-# LANGUAGE OverloadedStrings #-}
{-# LANGUAGE RecordWildCards #-}
module Main where module Main where
import qualified Data.ByteString.Lazy as BS import qualified Data.ByteString.Lazy as BS
@ -7,7 +9,6 @@ import qualified Data.Text as Text
import Data.Aeson (encode) import Data.Aeson (encode)
import Data.Char (toLower) import Data.Char (toLower)
import Data.List (nub) import Data.List (nub)
import Data.Monoid ((<>))
import Control.Monad (forM_) import Control.Monad (forM_)
import System.Directory (createDirectoryIfMissing) import System.Directory (createDirectoryIfMissing)
import System.FilePath ((</>), (<.>)) import System.FilePath ((</>), (<.>))
@ -80,4 +81,4 @@ writeFiles outputDir env@Env{..} = do
, factTablePopulateSQLs IncRefresh $ factTablePopulateSQL IncrementalPopulation , factTablePopulateSQLs IncRefresh $ factTablePopulateSQL IncrementalPopulation
] ]
sqlStr s = Text.unpack $ s <> ";\n" sqlStr = Text.unpack

View File

@ -1,3 +1,5 @@
{-# LANGUAGE OverloadedStrings #-}
{-# LANGUAGE RecordWildCards #-}
module Ringo.ArgParser (ProgArgs(..), parseArgs) where module Ringo.ArgParser (ProgArgs(..), parseArgs) where
import qualified Data.Text as Text import qualified Data.Text as Text

View File

@ -1,3 +1,6 @@
{-# LANGUAGE OverloadedStrings #-}
{-# LANGUAGE RecordWildCards #-}
{-# LANGUAGE CPP #-}
module Ringo.InputParser (parseInput) where module Ringo.InputParser (parseInput) where
import qualified Data.Text as Text import qualified Data.Text as Text
@ -50,17 +53,17 @@ instance FromJSON FactColumn where
parseJSON (Object o) = do parseJSON (Object o) = do
cType <- o .: "type" cType <- o .: "type"
case cType of case cType of
"dimtime" -> DimTime <$> o .: "column" "dimtime" -> FactColumn <$> o .: "column" <*> pure DimTime
"nodimid" -> NoDimId <$> o .: "column" "nodimid" -> FactColumn <$> o .: "column" <*> pure NoDimId
"tenantid" -> TenantId <$> o .: "column" "tenantid" -> FactColumn <$> o .: "column" <*> pure TenantId
"dimid" -> DimId <$> o .: "table" <*> o .: "column" "dimid" -> FactColumn <$> o .: "column" <*> (DimId <$> o .: "table")
"dimval" -> DimVal <$> o .: "table" <*> o .: "column" "dimval" -> FactColumn <$> o .: "column" <*> (DimVal <$> o .: "table")
"factcount" -> FactCount <$> o .:? "sourcecolumn" <*> o .: "column" "factcount" -> FactColumn <$> o .: "column" <*> (FactCount <$> o .:? "sourcecolumn")
"factsum" -> FactSum <$> o .: "sourcecolumn" <*> o .: "column" "factcountdistinct" -> FactColumn <$> o .: "column" <*> (FactCountDistinct <$> o .:? "sourcecolumn")
"factaverage" -> FactAverage <$> o .: "sourcecolumn" <*> o .: "column" "factsum" -> FactColumn <$> o .: "column" <*> (FactSum <$> o .: "sourcecolumn")
"factcountdistinct" -> FactCountDistinct <$> o .:? "sourcecolumn" <*> o .: "column" "factaverage" -> FactColumn <$> o .: "column" <*> (FactAverage <$> o .: "sourcecolumn")
"factmax" -> FactMax <$> o .: "sourcecolumn" <*> o .: "column" "factmax" -> FactColumn <$> o .: "column" <*> (FactMax <$> o .: "sourcecolumn")
"factmin" -> FactMin <$> o .: "sourcecolumn" <*> o .: "column" "factmin" -> FactColumn <$> o .: "column" <*> (FactMin <$> o .: "sourcecolumn")
_ -> fail $ "Invalid fact column type: " ++ cType _ -> fail $ "Invalid fact column type: " ++ cType
parseJSON o = fail $ "Cannot parse fact column: " ++ show o parseJSON o = fail $ "Cannot parse fact column: " ++ show o
@ -72,7 +75,7 @@ instance FromJSON Fact where
<*> o .: "columns" <*> o .: "columns"
parseJSON o = fail $ "Cannot parse fact: " ++ show o parseJSON o = fail $ "Cannot parse fact: " ++ show o
data Input = Input [Table] [Fact] TypeDefaults deriving (Eq, Show) data Input = Input [Table] [Fact] TypeDefaults deriving (Show)
instance FromJSON Input where instance FromJSON Input where
parseJSON (Object o) = Input <$> o .: "tables" <*> o .: "facts" <*> o .: "defaults" parseJSON (Object o) = Input <$> o .: "tables" <*> o .: "facts" <*> o .: "defaults"

View File

@ -22,6 +22,7 @@ library
Ringo.Validator, Ringo.Validator,
Ringo.Extractor.Internal, Ringo.Extractor.Internal,
Ringo.Generator.Internal, Ringo.Generator.Internal,
Ringo.Generator.Sql,
Ringo.Generator.Create, Ringo.Generator.Create,
Ringo.Generator.Populate.Dimension, Ringo.Generator.Populate.Dimension,
Ringo.Generator.Populate.Fact, Ringo.Generator.Populate.Fact,
@ -32,9 +33,8 @@ library
mtl >=2.1 && <2.3, mtl >=2.1 && <2.3,
raw-strings-qq >=1.0 && <1.2, raw-strings-qq >=1.0 && <1.2,
hssqlppp ==0.5.23 hssqlppp ==0.5.23
ghc-options: -Wall -fno-warn-unused-do-bind -funbox-strict-fields -fno-warn-orphans -O2 ghc-options: -Wall -Werror -fwarn-incomplete-uni-patterns -fno-warn-unused-do-bind
default-extensions: OverloadedStrings, RecordWildCards, ScopedTypeVariables, BangPatterns, -fno-warn-orphans -funbox-strict-fields -O2
TupleSections, CPP, NamedFieldPuns
default-language: Haskell2010 default-language: Haskell2010
executable ringo executable ringo
@ -53,17 +53,21 @@ executable ringo
filepath >=1.3 && <1.5, filepath >=1.3 && <1.5,
aeson >=0.8 && <0.11, aeson >=0.8 && <0.11,
ringo ringo
ghc-options: -Wall -fno-warn-unused-do-bind -funbox-strict-fields -fno-warn-orphans -O2 ghc-options: -Wall -Werror -fwarn-incomplete-uni-patterns -fno-warn-unused-do-bind
default-extensions: OverloadedStrings, RecordWildCards, ScopedTypeVariables, BangPatterns, -fno-warn-orphans -funbox-strict-fields -O2
TupleSections, CPP, NamedFieldPuns
default-language: Haskell2010 default-language: Haskell2010
test-suite ringo-test test-suite ringo-test
type: exitcode-stdio-1.0 type: exitcode-stdio-1.0
hs-source-dirs: test hs-source-dirs: test, src
main-is: Spec.hs main-is: doctests.hs
build-depends: base if impl(ghc >= 7.10)
, ringo build-depends: base
, ringo
, pretty-show >=1.6 && <1.7
, doctest >=0.9 && <0.11
else
buildable: False
default-language: Haskell2010 default-language: Haskell2010
source-repository head source-repository head

View File

@ -1,5 +1,10 @@
{-# LANGUAGE OverloadedStrings #-}
module Ringo module Ringo
( module Ringo.Types ( -- | The examples in this module assume the following code has been run.
-- The :{ and :} will only work in GHCi.
-- $setup
module Ringo.Types
, extractFactTable , extractFactTable
, extractDimensionTables , extractDimensionTables
, extractDependencies , extractDependencies
@ -19,31 +24,608 @@ import qualified Ringo.Extractor as E
import qualified Ringo.Generator as G import qualified Ringo.Generator as G
import qualified Ringo.Validator as V import qualified Ringo.Validator as V
-- $setup
-- >>> :set -XOverloadedStrings
-- >>> import Ringo
-- >>> import qualified Data.Map as Map
-- >>> import qualified Data.Text as Text
-- >>> import Data.List (nub)
-- >>> import Text.Show.Pretty
-- >>> :{
--let sessionEventsTable =
-- Table { tableName = "session_events"
-- , tableColumns =
-- [ Column "id" "uuid" NotNull
-- , Column "created_at" "timestamp without time zone" NotNull
-- , Column "member_id" "integer" Null
-- , Column "publisher_id" "integer" NotNull
-- , Column "user_agent" "character varying(1024)" Null
-- , Column "browser_name" "character varying(50)" Null
-- , Column "os" "character varying(50)" Null
-- , Column "geo_country_name" "character varying(50)" Null
-- , Column "geo_city_name" "character varying(50)" Null
-- , Column "geo_continent_name" "character varying(15)" Null
-- , Column "geo_most_specific_subdivision_name" "character varying(100)" Null
-- , Column "geo_longitude" "numeric(9,6)" Null
-- , Column "geo_latitude" "numeric(9,6)" Null
-- , Column "geo_time_zone" "character varying(20)" Null
-- , Column "geo_postal_code" "character varying(20)" Null
-- , Column "user_agent_name" "character varying(100)" Null
-- , Column "user_agent_type" "character varying(15)" Null
-- , Column "user_agent_version" "character varying(100)" Null
-- , Column "user_agent_device" "character varying(15)" Null
-- ]
-- , tableConstraints = [ PrimaryKey "id" ]
-- }
-- sessionFact =
-- Fact { factName = "session"
-- , factTableName = "session_events"
-- , factTablePersistent = True
-- , factParentNames = []
-- , factColumns =
-- [ FactColumn "created_at" $ DimTime
-- , FactColumn "publisher_id" $ NoDimId
-- , FactColumn "browser_name" $ DimVal "user_agent"
-- , FactColumn "os" $ DimVal "user_agent"
-- , FactColumn "user_agent_name" $ DimVal "user_agent"
-- , FactColumn "geo_country_name" $ DimVal "geo"
-- , FactColumn "geo_city_name" $ DimVal "geo"
-- , FactColumn "geo_continent_name" $ DimVal "geo"
-- , FactColumn "session_count" $ FactCount Nothing
-- ]
-- }
-- pageViewEventsTable =
-- Table { tableName = "page_view_events"
-- , tableColumns =
-- [ Column "id" "uuid" NotNull
-- , Column "created_at" "timestamp without time zone" NotNull
-- , Column "member_id" "integer" Null
-- , Column "publisher_id" "integer" NotNull
-- , Column "device_tracker_id" "uuid" Null
-- , Column "session_event_id" "uuid" Null
-- , Column "page_type" "character varying(20)" NotNull
-- , Column "referrer" "character varying(1024)" Null
-- , Column "url" "character varying(1024)" Null
-- , Column "referrer_id" "integer" Null
-- ]
-- , tableConstraints =
-- [ PrimaryKey "id"
-- , ForeignKey "session_events" [ ("session_event_id", "id")
-- , ("publisher_id", "publisher_id")
-- ]
-- , ForeignKey "referrers" [ ("referrer_id", "id")
-- , ("publisher_id", "publisher_id")
-- ]
-- ]
-- }
-- pageViewFact =
-- Fact { factName = "page_view"
-- , factTableName = "page_view_events"
-- , factTablePersistent = True
-- , factParentNames = [ "session" ]
-- , factColumns =
-- [ FactColumn "created_at" $ DimTime
-- , FactColumn "publisher_id" $ NoDimId
-- , FactColumn "page_type" $ DimVal "page_type"
-- , FactColumn "referrer_id" $ DimId "referrers"
-- , FactColumn "view_count" $ FactCount Nothing
-- ]
-- }
-- referrersTable =
-- Table { tableName = "referrers"
-- , tableColumns =
-- [ Column "id" "integer" NotNull
-- , Column "publisher_id" "integer" NotNull
-- , Column "name" "character varying(100)" NotNull
-- ]
-- , tableConstraints =
-- [ PrimaryKey "id"
-- , UniqueKey ["publisher_id", "name"]
-- ]
-- }
-- tables = [sessionEventsTable, pageViewEventsTable, referrersTable]
-- facts = [sessionFact, pageViewFact]
-- typeDefaults = Map.fromList [ ("integer", "-1")
-- , ("timestamp", "'00-00-00 00:00:00'")
-- , ("character", "'__UNKNOWN_VAL__'")
-- , ("uuid", "'00000000-0000-0000-0000-000000000000'::uuid")
-- , ("boolean", "false")
-- , ("json", "'{}'::json")
-- , ("numeric", "-1")
-- , ("text", "'__UNKNOWN_VAL__'")
-- ]
-- settings = defSettings { settingTableNameSuffixTemplate = "" }
-- env = Env tables facts settings typeDefaults
-- :}
-- |
--
-- >>> print $ extractFactTable env sessionFact
-- Table fact_session_by_minute
-- Column created_at_minute_id bigint NOT NULL
-- Column publisher_id integer NOT NULL
-- Column session_count integer NOT NULL
-- Column geo_id integer NOT NULL
-- Column user_agent_id integer NOT NULL
-- UniqueKey (created_at_minute_id, publisher_id, geo_id, user_agent_id)
-- <BLANKLINE>
-- >>> print $ extractFactTable env pageViewFact
-- Table fact_page_view_by_minute
-- Column created_at_minute_id bigint NOT NULL
-- Column publisher_id integer NOT NULL
-- Column view_count integer NOT NULL
-- Column referrer_id integer NOT NULL
-- Column page_type_id integer NOT NULL
-- Column geo_id integer NOT NULL
-- Column user_agent_id integer NOT NULL
-- UniqueKey (created_at_minute_id, publisher_id, referrer_id, page_type_id, geo_id, user_agent_id)
-- <BLANKLINE>
extractFactTable :: Env -> Fact -> Table extractFactTable :: Env -> Fact -> Table
extractFactTable env = flip runReader env . E.extractFactTable extractFactTable env = flip runReader env . E.extractFactTable
-- |
--
-- >>> mapM_ print $ extractDimensionTables env sessionFact
-- Table dim_geo
-- Column id serial NOT NULL
-- Column country_name character varying(50) NOT NULL
-- Column city_name character varying(50) NOT NULL
-- Column continent_name character varying(15) NOT NULL
-- PrimaryKey id
-- UniqueKey (country_name, city_name, continent_name)
-- <BLANKLINE>
-- Table dim_user_agent
-- Column id serial NOT NULL
-- Column browser_name character varying(50) NOT NULL
-- Column os character varying(50) NOT NULL
-- Column name character varying(100) NOT NULL
-- PrimaryKey id
-- UniqueKey (browser_name, os, name)
-- <BLANKLINE>
-- >>> mapM_ print . filter (`notElem` tables) $ extractDimensionTables env pageViewFact
-- Table dim_page_type
-- Column id serial NOT NULL
-- Column page_type character varying(20) NOT NULL
-- PrimaryKey id
-- UniqueKey (page_type)
-- <BLANKLINE>
extractDimensionTables :: Env -> Fact -> [Table] extractDimensionTables :: Env -> Fact -> [Table]
extractDimensionTables env = flip runReader env . E.extractDimensionTables extractDimensionTables env = flip runReader env . E.extractDimensionTables
-- |
--
-- >>> putStrLn . ppShow $ extractDependencies env sessionFact
-- fromList
-- [ ( "dim_geo" , [ "session_events" ] )
-- , ( "dim_user_agent" , [ "session_events" ] )
-- , ( "fact_session_by_minute"
-- , [ "session_events" , "dim_user_agent" , "dim_geo" ]
-- )
-- ]
-- >>> putStrLn . ppShow $ extractDependencies env pageViewFact
-- fromList
-- [ ( "dim_page_type" , [ "page_view_events" ] )
-- , ( "fact_page_view_by_minute"
-- , [ "page_view_events"
-- , "session_events"
-- , "dim_page_type"
-- , "referrers"
-- , "dim_user_agent"
-- , "dim_geo"
-- ]
-- )
-- ]
extractDependencies :: Env -> Fact -> Dependencies extractDependencies :: Env -> Fact -> Dependencies
extractDependencies env = flip runReader env . E.extractDependencies extractDependencies env = flip runReader env . E.extractDependencies
-- |
--
-- >>> let dimTables = filter (`notElem` tables) . nub . concatMap (extractDimensionTables env) $ facts
-- >>> let sqls = map (dimensionTableDefnSQL env) dimTables
-- >>> mapM_ (\sqls -> mapM_ (putStr . Text.unpack) sqls >> putStrLn "--------" ) sqls
-- create table dim_geo (
-- id serial not null,
-- country_name character varying(50) not null,
-- city_name character varying(50) not null,
-- continent_name character varying(15) not null
-- )
-- ;
-- <BLANKLINE>
-- alter table dim_geo add primary key (id);
-- <BLANKLINE>
-- alter table dim_geo add unique (country_name,
-- city_name,
-- continent_name);
-- <BLANKLINE>
-- create index on dim_geo (country_name)
-- ;
-- create index on dim_geo (city_name)
-- ;
-- create index on dim_geo (continent_name)
-- ;
-- --------
-- create table dim_user_agent (
-- id serial not null,
-- browser_name character varying(50) not null,
-- os character varying(50) not null,
-- name character varying(100) not null
-- )
-- ;
-- <BLANKLINE>
-- alter table dim_user_agent add primary key (id);
-- <BLANKLINE>
-- alter table dim_user_agent add unique (browser_name, os, name);
-- <BLANKLINE>
-- create index on dim_user_agent (browser_name)
-- ;
-- create index on dim_user_agent (os)
-- ;
-- create index on dim_user_agent (name)
-- ;
-- --------
-- create table dim_page_type (
-- id serial not null,
-- page_type character varying(20) not null
-- )
-- ;
-- <BLANKLINE>
-- alter table dim_page_type add primary key (id);
-- <BLANKLINE>
-- alter table dim_page_type add unique (page_type);
-- <BLANKLINE>
-- --------
dimensionTableDefnSQL :: Env -> Table -> [Text] dimensionTableDefnSQL :: Env -> Table -> [Text]
dimensionTableDefnSQL env = flip runReader env . G.dimensionTableDefnSQL dimensionTableDefnSQL env = flip runReader env . G.dimensionTableDefnSQL
-- |
--
-- >>> let storySessionFactTable = extractFactTable env sessionFact
-- >>> let sqls = factTableDefnSQL env sessionFact storySessionFactTable
-- >>> mapM_ (putStr . Text.unpack) sqls
-- create table fact_session_by_minute (
-- created_at_minute_id bigint not null,
-- publisher_id integer not null,
-- session_count integer not null,
-- geo_id integer not null,
-- user_agent_id integer not null
-- )
-- ;
-- <BLANKLINE>
-- alter table fact_session_by_minute add unique (created_at_minute_id,
-- publisher_id,
-- geo_id,
-- user_agent_id);
-- <BLANKLINE>
-- create index on fact_session_by_minute (created_at_minute_id)
-- ;
-- create index on fact_session_by_minute (publisher_id)
-- ;
-- create index on fact_session_by_minute (geo_id)
-- ;
-- create index on fact_session_by_minute (user_agent_id)
-- ;
-- >>> let pageViewFactTable = extractFactTable env pageViewFact
-- >>> let sqls = factTableDefnSQL env pageViewFact pageViewFactTable
-- >>> mapM_ (putStr . Text.unpack) sqls
-- create table fact_page_view_by_minute (
-- created_at_minute_id bigint not null,
-- publisher_id integer not null,
-- view_count integer not null,
-- referrer_id integer not null,
-- page_type_id integer not null,
-- geo_id integer not null,
-- user_agent_id integer not null
-- )
-- ;
-- <BLANKLINE>
-- alter table fact_page_view_by_minute add unique (created_at_minute_id,
-- publisher_id,
-- referrer_id,
-- page_type_id,
-- geo_id,
-- user_agent_id);
-- <BLANKLINE>
-- create index on fact_page_view_by_minute (created_at_minute_id)
-- ;
-- create index on fact_page_view_by_minute (publisher_id)
-- ;
-- create index on fact_page_view_by_minute (referrer_id)
-- ;
-- create index on fact_page_view_by_minute (page_type_id)
-- ;
-- create index on fact_page_view_by_minute (geo_id)
-- ;
-- create index on fact_page_view_by_minute (user_agent_id)
-- ;
factTableDefnSQL :: Env -> Fact -> Table -> [Text] factTableDefnSQL :: Env -> Fact -> Table -> [Text]
factTableDefnSQL env fact = flip runReader env . G.factTableDefnSQL fact factTableDefnSQL env fact = flip runReader env . G.factTableDefnSQL fact
-- |
--
-- >>> let storySessionDimTableNames = map tableName $ extractDimensionTables env sessionFact
-- >>> let sqls = map (dimensionTablePopulateSQL FullPopulation env sessionFact) storySessionDimTableNames
-- >>> mapM_ (putStr . Text.unpack) sqls
-- insert into dim_geo (country_name, city_name, continent_name)
-- select distinct
-- coalesce(session_events.geo_country_name,'__UNKNOWN_VAL__') as geo_country_name,
-- coalesce(session_events.geo_city_name,'__UNKNOWN_VAL__') as geo_city_name,
-- coalesce(session_events.geo_continent_name,'__UNKNOWN_VAL__') as geo_continent_name
-- from
-- session_events
-- where
-- (geo_country_name is not null or geo_city_name is not null or geo_continent_name is not null)
-- and
-- created_at < ?
-- ;
-- <BLANKLINE>
-- insert into dim_user_agent (browser_name, os, name)
-- select distinct
-- coalesce(session_events.browser_name,'__UNKNOWN_VAL__') as browser_name,
-- coalesce(session_events.os,'__UNKNOWN_VAL__') as os,
-- coalesce(session_events.user_agent_name,'__UNKNOWN_VAL__') as user_agent_name
-- from
-- session_events
-- where
-- (browser_name is not null or os is not null or user_agent_name is not null)
-- and
-- created_at < ?
-- ;
-- <BLANKLINE>
-- >>> let sqls = map (dimensionTablePopulateSQL IncrementalPopulation env sessionFact) storySessionDimTableNames
-- >>> mapM_ (putStr . Text.unpack) sqls
-- insert into dim_geo (country_name, city_name, continent_name)
-- select
-- x.*
-- from
-- (select distinct
-- coalesce(session_events.geo_country_name,'__UNKNOWN_VAL__') as geo_country_name,
-- coalesce(session_events.geo_city_name,'__UNKNOWN_VAL__') as geo_city_name,
-- coalesce(session_events.geo_continent_name,'__UNKNOWN_VAL__') as geo_continent_name
-- from
-- session_events
-- where
-- (geo_country_name is not null or geo_city_name is not null or geo_continent_name is not null)
-- and
-- created_at < ?
-- and
-- created_at >= ?) as x
-- left outer join
-- dim_geo
-- on dim_geo.country_name = x.geo_country_name
-- and
-- dim_geo.city_name = x.geo_city_name
-- and
-- dim_geo.continent_name = x.geo_continent_name
-- where
-- dim_geo.country_name is null and dim_geo.city_name is null
-- and
-- dim_geo.continent_name is null
-- ;
-- <BLANKLINE>
-- insert into dim_user_agent (browser_name, os, name)
-- select
-- x.*
-- from
-- (select distinct
-- coalesce(session_events.browser_name,'__UNKNOWN_VAL__') as browser_name,
-- coalesce(session_events.os,'__UNKNOWN_VAL__') as os,
-- coalesce(session_events.user_agent_name,'__UNKNOWN_VAL__') as user_agent_name
-- from
-- session_events
-- where
-- (browser_name is not null or os is not null or user_agent_name is not null)
-- and
-- created_at < ?
-- and
-- created_at >= ?) as x
-- left outer join
-- dim_user_agent
-- on dim_user_agent.browser_name = x.browser_name
-- and
-- dim_user_agent.os = x.os
-- and
-- dim_user_agent.name = x.user_agent_name
-- where
-- dim_user_agent.browser_name is null and dim_user_agent.os is null
-- and
-- dim_user_agent.name is null
-- ;
-- <BLANKLINE>
-- >>> let pageViewDimTableNames = map tableName . filter (`notElem` tables) $ extractDimensionTables env pageViewFact
-- >>> let sqls = map (dimensionTablePopulateSQL FullPopulation env pageViewFact) pageViewDimTableNames
-- >>> mapM_ (putStr . Text.unpack) sqls
-- insert into dim_page_type (page_type)
-- select distinct
-- page_view_events.page_type as page_type
-- from
-- page_view_events
-- where
-- (page_type is not null) and created_at < ?
-- ;
-- <BLANKLINE>
-- >>> let sqls = map (dimensionTablePopulateSQL IncrementalPopulation env pageViewFact) pageViewDimTableNames
-- >>> mapM_ (putStr . Text.unpack) sqls
-- insert into dim_page_type (page_type)
-- select
-- x.*
-- from
-- (select distinct
-- page_view_events.page_type as page_type
-- from
-- page_view_events
-- where
-- (page_type is not null) and created_at < ?
-- and
-- created_at >= ?) as x
-- left outer join
-- dim_page_type
-- on dim_page_type.page_type = x.page_type
-- where
-- dim_page_type.page_type is null
-- ;
-- <BLANKLINE>
dimensionTablePopulateSQL :: TablePopulationMode -> Env -> Fact -> TableName -> Text dimensionTablePopulateSQL :: TablePopulationMode -> Env -> Fact -> TableName -> Text
dimensionTablePopulateSQL popMode env fact = dimensionTablePopulateSQL popMode env fact =
flip runReader env . G.dimensionTablePopulateSQL popMode fact flip runReader env . G.dimensionTablePopulateSQL popMode fact
-- |
--
-- >>> let sqls = factTablePopulateSQL FullPopulation env sessionFact
-- >>> mapM_ (putStr . Text.unpack) sqls
-- insert into fact_session_by_minute (created_at_minute_id,
-- publisher_id,
-- session_count,
-- geo_id,
-- user_agent_id)
-- select
-- cast(floor(extract(epoch from session_events.created_at) / 60) as bigint) as xxff_created_at_minute_id,
-- session_events.publisher_id as xxff_publisher_id,
-- count(*) as xxff_session_count,
-- coalesce((select
-- id
-- from
-- dim_geo as dim_geo
-- where
-- dim_geo.country_name = coalesce(session_events.geo_country_name,'__UNKNOWN_VAL__')
-- and
-- dim_geo.city_name = coalesce(session_events.geo_city_name,'__UNKNOWN_VAL__')
-- and
-- dim_geo.continent_name = coalesce(session_events.geo_continent_name,'__UNKNOWN_VAL__')),-1) as xxff_geo_id,
-- coalesce((select
-- id
-- from
-- dim_user_agent as dim_user_agent
-- where
-- dim_user_agent.browser_name = coalesce(session_events.browser_name,'__UNKNOWN_VAL__')
-- and
-- dim_user_agent.os = coalesce(session_events.os,'__UNKNOWN_VAL__')
-- and
-- dim_user_agent.name = coalesce(session_events.user_agent_name,'__UNKNOWN_VAL__')),-1) as xxff_user_agent_id
-- from
-- session_events
-- where
-- session_events.created_at < ?
-- group by
-- xxff_created_at_minute_id,
-- xxff_publisher_id,
-- xxff_geo_id,
-- xxff_user_agent_id
-- ;
-- <BLANKLINE>
-- >>> let sqls = factTablePopulateSQL IncrementalPopulation env sessionFact
-- >>> mapM_ (putStr . Text.unpack) sqls
-- insert into fact_session_by_minute (created_at_minute_id,
-- publisher_id,
-- session_count,
-- geo_id,
-- user_agent_id)
-- select
-- cast(floor(extract(epoch from session_events.created_at) / 60) as bigint) as xxff_created_at_minute_id,
-- session_events.publisher_id as xxff_publisher_id,
-- count(*) as xxff_session_count,
-- coalesce((select
-- id
-- from
-- dim_geo as dim_geo
-- where
-- dim_geo.country_name = coalesce(session_events.geo_country_name,'__UNKNOWN_VAL__')
-- and
-- dim_geo.city_name = coalesce(session_events.geo_city_name,'__UNKNOWN_VAL__')
-- and
-- dim_geo.continent_name = coalesce(session_events.geo_continent_name,'__UNKNOWN_VAL__')),-1) as xxff_geo_id,
-- coalesce((select
-- id
-- from
-- dim_user_agent as dim_user_agent
-- where
-- dim_user_agent.browser_name = coalesce(session_events.browser_name,'__UNKNOWN_VAL__')
-- and
-- dim_user_agent.os = coalesce(session_events.os,'__UNKNOWN_VAL__')
-- and
-- dim_user_agent.name = coalesce(session_events.user_agent_name,'__UNKNOWN_VAL__')),-1) as xxff_user_agent_id
-- from
-- session_events
-- where
-- session_events.created_at < ? and session_events.created_at >= ?
-- group by
-- xxff_created_at_minute_id,
-- xxff_publisher_id,
-- xxff_geo_id,
-- xxff_user_agent_id
-- ;
-- <BLANKLINE>
-- >>> let sqls = factTablePopulateSQL FullPopulation env pageViewFact
-- >>> mapM_ (putStr . Text.unpack) sqls
-- insert into fact_page_view_by_minute (created_at_minute_id,
-- publisher_id,
-- view_count,
-- referrer_id,
-- page_type_id,
-- geo_id,
-- user_agent_id)
-- select
-- cast(floor(extract(epoch from page_view_events.created_at) / 60) as bigint) as xxff_created_at_minute_id,
-- page_view_events.publisher_id as xxff_publisher_id,
-- count(*) as xxff_view_count,
-- coalesce(page_view_events.referrer_id,-1) as xxff_referrer_id,
-- coalesce((select
-- id
-- from
-- dim_page_type as dim_page_type
-- where
-- dim_page_type.page_type = page_view_events.page_type),-1) as xxff_page_type_id,
-- coalesce((select
-- id
-- from
-- dim_geo as dim_geo
-- where
-- dim_geo.country_name = coalesce(session_events.geo_country_name,'__UNKNOWN_VAL__')
-- and
-- dim_geo.city_name = coalesce(session_events.geo_city_name,'__UNKNOWN_VAL__')
-- and
-- dim_geo.continent_name = coalesce(session_events.geo_continent_name,'__UNKNOWN_VAL__')),-1) as xxff_geo_id,
-- coalesce((select
-- id
-- from
-- dim_user_agent as dim_user_agent
-- where
-- dim_user_agent.browser_name = coalesce(session_events.browser_name,'__UNKNOWN_VAL__')
-- and
-- dim_user_agent.os = coalesce(session_events.os,'__UNKNOWN_VAL__')
-- and
-- dim_user_agent.name = coalesce(session_events.user_agent_name,'__UNKNOWN_VAL__')),-1) as xxff_user_agent_id
-- from
-- page_view_events
-- left outer join
-- session_events
-- on page_view_events.session_event_id = session_events.id
-- and
-- page_view_events.publisher_id = session_events.publisher_id
-- where
-- page_view_events.created_at < ?
-- group by
-- xxff_created_at_minute_id,
-- xxff_publisher_id,
-- xxff_referrer_id,
-- xxff_page_type_id,
-- xxff_geo_id,
-- xxff_user_agent_id
-- ;
-- <BLANKLINE>
factTablePopulateSQL :: TablePopulationMode -> Env -> Fact -> [Text] factTablePopulateSQL :: TablePopulationMode -> Env -> Fact -> [Text]
factTablePopulateSQL popMode env = factTablePopulateSQL popMode env =
flip runReader env . G.factTablePopulateSQL popMode flip runReader env . G.factTablePopulateSQL popMode
-- |
--
-- >>> concatMap (validateTable env) tables
-- []
validateTable :: Env -> Table -> [ValidationError] validateTable :: Env -> Table -> [ValidationError]
validateTable env = flip runReader env . V.validateTable validateTable env = flip runReader env . V.validateTable
-- |
--
-- >>> concatMap (validateFact env) facts
-- []
validateFact :: Env -> Fact -> [ValidationError] validateFact :: Env -> Fact -> [ValidationError]
validateFact env = flip runReader env . V.validateFact validateFact env = flip runReader env . V.validateFact

View File

@ -1,3 +1,6 @@
{-# LANGUAGE OverloadedStrings #-}
{-# LANGUAGE RecordWildCards #-}
{-# LANGUAGE GADTs #-}
module Ringo.Extractor module Ringo.Extractor
( extractDimensionTables ( extractDimensionTables
, extractAllDimensionTables , extractAllDimensionTables
@ -30,34 +33,36 @@ extractFactTable fact = do
notNullSourceColumnCopy cName = (sourceColumn cName) { columnNullable = NotNull } notNullSourceColumnCopy cName = (sourceColumn cName) { columnNullable = NotNull }
notNullSourceColumnRename scName cName = (notNullSourceColumnCopy scName) { columnName = cName } notNullSourceColumnRename scName cName = (notNullSourceColumnCopy scName) { columnName = cName }
columns = concatFor (factColumns fact) $ \col -> case col of columns = concatFor (factColumns fact) $ \FactColumn {factColTargetColumn = cName, ..} ->
DimTime cName -> case factColType of
[ Column (timeUnitColumnName dimIdColName cName settingTimeUnit) "bigint" NotNull ] DimTime ->
NoDimId cName -> [ notNullSourceColumnCopy cName ] [ Column (timeUnitColumnName dimIdColName cName settingTimeUnit) "bigint" NotNull ]
TenantId cName -> [ notNullSourceColumnCopy cName ] NoDimId -> [ notNullSourceColumnCopy cName ]
FactCount _ cName -> [ Column cName countColType NotNull ] TenantId -> [ notNullSourceColumnCopy cName ]
FactSum scName cName -> [ notNullSourceColumnRename scName cName ] FactCount {..} -> [ Column cName countColType NotNull ]
FactMax scName cName -> [ notNullSourceColumnRename scName cName ] FactCountDistinct {..} -> [ Column cName "json" NotNull ]
FactMin scName cName -> [ notNullSourceColumnRename scName cName ] FactSum {..} -> [ notNullSourceColumnRename factColSourceColumn cName ]
FactAverage scName cName -> FactMax {..} -> [ notNullSourceColumnRename factColSourceColumn cName ]
[ Column (cName <> settingAvgCountColumSuffix) countColType NotNull FactMin {..} -> [ notNullSourceColumnRename factColSourceColumn cName ]
, notNullSourceColumnRename scName (cName <> settingAvgSumColumnSuffix) FactAverage {..} ->
] [ Column (cName <> settingAvgCountColumSuffix) countColType NotNull
FactCountDistinct _ cName -> [ Column cName "json" NotNull ] , notNullSourceColumnRename factColSourceColumn (cName <> settingAvgSumColumnSuffix)
_ -> [] ]
_ -> []
fkColumns = for allDims $ \(_, Table {..}) -> fkColumns = for allDims $ \(dimFact, dimTable) ->
let colName = factDimFKIdColumnName settingDimPrefix dimIdColName tableName let colName = factDimFKIdColumnName settingDimPrefix dimIdColName dimFact dimTable tables
colType = idColTypeToFKIdColType settingDimTableIdColumnType colType = idColTypeToFKIdColType settingDimTableIdColumnType
in Column colName colType NotNull in Column colName colType NotNull
ukColNames = ukColNames =
(++ map columnName fkColumns) (++ map columnName fkColumns)
. forMaybe (factColumns fact) $ \col -> case col of . forMaybe (factColumns fact) $ \FactColumn {factColTargetColumn = cName, ..} ->
DimTime cName -> Just (timeUnitColumnName dimIdColName cName settingTimeUnit) case factColType of
NoDimId cName -> Just cName DimTime -> Just $ timeUnitColumnName dimIdColName cName settingTimeUnit
TenantId cName -> Just cName NoDimId -> Just cName
_ -> Nothing TenantId -> Just cName
_ -> Nothing
return Table return Table
{ tableName = { tableName =
@ -75,15 +80,15 @@ extractDependencies fact = do
(factTableName fct, parentFacts fct facts) (factTableName fct, parentFacts fct facts)
factDimDeps = factDimDeps =
nub . concat . Tree.flatten . flip Tree.unfoldTree fact $ \fct -> nub . concat . Tree.flatten . flip Tree.unfoldTree fact $ \fct ->
( forMaybe (factColumns fct) $ \col -> case col of ( forMaybe (factColumns fct) $ \FactColumn {..} -> case factColType of
DimVal table _ -> Just $ settingDimPrefix <> table DimVal {..} -> Just $ settingDimPrefix <> factColTargetTable
DimId table _ -> Just table DimId {..} -> Just factColTargetTable
_ -> Nothing _ -> Nothing
, parentFacts fct facts , parentFacts fct facts
) )
dimDeps = Map.fromList [ (settingDimPrefix <> table, [factTableName fact]) dimDeps = Map.fromList [ (settingDimPrefix <> table, [factTableName fact])
| DimVal table _ <- factColumns fact ] | FactColumn {factColType = DimVal table} <- factColumns fact ]
factDeps = Map.singleton (extractedTable settings) (factSourceDeps ++ factDimDeps) factDeps = Map.singleton (extractedTable settings) (factSourceDeps ++ factDimDeps)
return $ Map.union dimDeps factDeps return $ Map.union dimDeps factDeps

View File

@ -1,3 +1,8 @@
{-# LANGUAGE OverloadedStrings #-}
{-# LANGUAGE RecordWildCards #-}
{-# LANGUAGE TupleSections #-}
{-# LANGUAGE CPP #-}
{-# LANGUAGE GADTs #-}
module Ringo.Extractor.Internal where module Ringo.Extractor.Internal where
import qualified Data.Map as Map import qualified Data.Map as Map
@ -34,9 +39,13 @@ timeUnitColumnName :: Text -> ColumnName -> TimeUnit -> ColumnName
timeUnitColumnName dimIdColName colName timeUnit = timeUnitColumnName dimIdColName colName timeUnit =
colName <> "_" <> timeUnitName timeUnit <> "_" <> dimIdColName colName <> "_" <> timeUnitName timeUnit <> "_" <> dimIdColName
factDimFKIdColumnName :: Text -> Text -> TableName -> ColumnName factDimFKIdColumnName :: Text -> Text -> Fact -> Table -> [Table] -> ColumnName
factDimFKIdColumnName dimPrefix dimIdColName dimTableName = factDimFKIdColumnName dimPrefix dimIdColName dimFact dimTable@Table { .. } tables =
fromMaybe dimTableName (Text.stripPrefix dimPrefix dimTableName) <> "_" <> dimIdColName if dimTable `elem` tables
then head [ factColTargetColumn
| FactColumn {factColType = DimId {..}, ..} <- factColumns dimFact
, factColTargetTable == tableName ]
else fromMaybe tableName (Text.stripPrefix dimPrefix tableName) <> "_" <> dimIdColName
extractedFactTableName :: Text -> Text -> TableName -> TimeUnit -> TableName extractedFactTableName :: Text -> Text -> TableName -> TimeUnit -> TableName
extractedFactTableName factPrefix factInfix factName timeUnit = extractedFactTableName factPrefix factInfix factName timeUnit =
@ -56,7 +65,9 @@ extractDimensionTables fact = do
let table = fromJust . findTable (factTableName fact) $ tables let table = fromJust . findTable (factTableName fact) $ tables
return $ dimsFromIds tables ++ dimsFromVals settings (tableColumns table) return $ dimsFromIds tables ++ dimsFromVals settings (tableColumns table)
where where
dimsFromIds tables = catMaybes [ findTable d tables | DimId d _ <- factColumns fact ] dimsFromIds tables =
catMaybes [ findTable factColTargetTable tables
| FactColumn {factColType = DimId {..}} <- factColumns fact ]
dimsFromVals Settings {..} tableColumns = dimsFromVals Settings {..} tableColumns =
map (\(dim, cols) -> map (\(dim, cols) ->
@ -75,9 +86,9 @@ extractDimensionTables fact = do
. nub) . nub)
. Map.fromListWith (flip (++)) . Map.fromListWith (flip (++))
. mapMaybe (\fcol -> do . mapMaybe (\fcol -> do
DimVal d col <- fcol FactColumn {factColType = DimVal {..}, ..} <- fcol
column <- findColumn col tableColumns column <- findColumn factColTargetColumn tableColumns
return (d, [ column ])) return (factColTargetTable, [ column ]))
. map Just . map Just
. factColumns . factColumns
$ fact $ fact

View File

@ -1,78 +1,96 @@
{-# LANGUAGE OverloadedStrings #-}
{-# LANGUAGE RecordWildCards #-}
{-# LANGUAGE CPP #-}
{-# LANGUAGE GADTs #-}
{-# LANGUAGE PatternSynonyms #-}
module Ringo.Generator.Create (dimensionTableDefnSQL, factTableDefnSQL) where module Ringo.Generator.Create (dimensionTableDefnSQL, factTableDefnSQL) where
#if MIN_VERSION_base(4,8,0) #if MIN_VERSION_base(4,8,0)
#else #else
import Control.Applicative ((<$>)) import Control.Applicative ((<$>))
#endif #endif
import Control.Monad.Reader (Reader, asks) import Control.Monad.Reader (Reader, asks)
import Data.Maybe (listToMaybe, maybeToList) import Database.HsSqlPpp.Syntax ( Statement(..), RowConstraint(..), AlterTableAction(..)
import Data.Monoid ((<>)) , AlterTableOperation(..), Constraint(..), Cascade(..) )
import Data.Text (Text) import Data.Maybe (listToMaybe, maybeToList)
import Data.Monoid ((<>))
import Data.Text (Text)
import Ringo.Extractor.Internal import Ringo.Extractor.Internal
import Ringo.Generator.Internal import Ringo.Generator.Sql
import Ringo.Types import Ringo.Types
import Ringo.Utils import Ringo.Utils
tableDefnSQL :: Table -> Reader Env [Text] tableDefnStmts :: Table -> Reader Env [Statement]
tableDefnSQL Table {..} = do tableDefnStmts Table {..} = do
Settings {..} <- asks envSettings Settings {..} <- asks envSettings
let tabName = tableName <> settingTableNameSuffixTemplate let tabName = tableName <> settingTableNameSuffixTemplate
tableSQL = "CREATE TABLE " <> tabName <> " (\n" tableSQL = CreateTable ea (name tabName) (map columnDefnSQL tableColumns) [] Nothing
<> (joinColumnNames . map columnDefnSQL $ tableColumns)
<> "\n)"
columnDefnSQL Column {..} = columnDefnSQL Column {..} =
columnName <> " " <> columnType <> " " <> nullableDefnSQL columnNullable attDef columnName columnType $ nullableDefnSQL columnNullable
nullableDefnSQL Null = "NULL" nullableDefnSQL Null = NullConstraint ea ""
nullableDefnSQL NotNull = "NOT NULL" nullableDefnSQL NotNull = NotNullConstraint ea ""
constraintDefnSQL constraint = constraintDefnSQL constraint =
let alterTableSQL = "ALTER TABLE ONLY " <> tabName <> " ADD " let constr = case constraint of
in case constraint of PrimaryKey cName -> PrimaryKeyConstraint ea "" [nmc cName]
PrimaryKey cName -> [ alterTableSQL <> "PRIMARY KEY (" <> cName <> ")" ] ForeignKey oTableName cNamePairs ->
ForeignKey oTableName cNamePairs -> ReferenceConstraint ea "" (map (nmc . fst) cNamePairs)
[ alterTableSQL <> "FOREIGN KEY (" <> joinColumnNames (map fst cNamePairs) <> ") REFERENCES " (name oTableName) (map (nmc . snd) cNamePairs) Restrict Restrict
<> oTableName <> " (" <> joinColumnNames (map snd cNamePairs) <> ")" ] UniqueKey cNames -> UniqueConstraint ea "" $ map nmc cNames
UniqueKey cNames -> ["CREATE UNIQUE INDEX ON " <> tabName <> " (" <> joinColumnNames cNames <> ")"]
return $ tableSQL : concatMap constraintDefnSQL tableConstraints in AlterTable ea (name tabName) $ AlterTableActions ea [AddConstraint ea constr]
return $ tableSQL : map constraintDefnSQL tableConstraints
tableDefnSQL :: Table -> (Table -> Reader Env [Statement]) -> Reader Env [Text]
tableDefnSQL table indexFn = do
ds <- map ppStatement <$> tableDefnStmts table
is <- map (\st -> ppStatement st <> ";\n") <$> indexFn table
return $ ds ++ is
dimensionTableDefnSQL :: Table -> Reader Env [Text] dimensionTableDefnSQL :: Table -> Reader Env [Text]
dimensionTableDefnSQL table@Table {..} = do dimensionTableDefnSQL table = tableDefnSQL table dimensionTableIndexStmts
dimensionTableIndexStmts :: Table -> Reader Env [Statement]
dimensionTableIndexStmts Table {..} = do
Settings {..} <- asks envSettings Settings {..} <- asks envSettings
let tabName = tableName <> settingTableNameSuffixTemplate let tabName = tableName <> settingTableNameSuffixTemplate
tablePKColName = head [ cName | PrimaryKey cName <- tableConstraints ] tablePKColName = head [ cName | PrimaryKey cName <- tableConstraints ]
nonPKColNames = [ cName | Column cName _ _ <- tableColumns, cName /= tablePKColName ] nonPKColNames = [ cName | Column cName _ _ <- tableColumns, cName /= tablePKColName ]
indexSQLs = [ "CREATE INDEX ON " <> tabName <> " (" <> cName <> ")"
| cName <- nonPKColNames, length nonPKColNames > 1 ] return [ CreateIndexTSQL ea (nmc "") (name tabName) [nmc cName]
(++ indexSQLs) <$> tableDefnSQL table | cName <- nonPKColNames, length nonPKColNames > 1 ]
factTableDefnSQL :: Fact -> Table -> Reader Env [Text] factTableDefnSQL :: Fact -> Table -> Reader Env [Text]
factTableDefnSQL fact table = do factTableDefnSQL fact table = tableDefnSQL table (factTableIndexStmts fact)
factTableIndexStmts :: Fact -> Table -> Reader Env [Statement]
factTableIndexStmts fact table = do
Settings {..} <- asks envSettings Settings {..} <- asks envSettings
tables <- asks envTables
allDims <- extractAllDimensionTables fact allDims <- extractAllDimensionTables fact
let dimTimeCol = head [ cName | DimTime cName <- factColumns fact ] let dimTimeCol = head [ cName | DimTimeV cName <- factColumns fact ]
tenantIdCol = listToMaybe [ cName | TenantId cName <- factColumns fact ] tenantIdCol = listToMaybe [ cName | TenantIdV cName <- factColumns fact ]
tabName = tableName table <> settingTableNameSuffixTemplate
dimTimeColName cName = timeUnitColumnName settingDimTableIdColumnName cName settingTimeUnit dimTimeColName cName = timeUnitColumnName settingDimTableIdColumnName cName settingTimeUnit
factCols = forMaybe (factColumns fact) $ \col -> case col of factCols = forMaybe (factColumns fact) $ \FactColumn {factColTargetColumn = cName, ..} ->
DimTime cName -> Just $ dimTimeColName cName case factColType of
NoDimId cName -> Just cName DimTime -> Just [dimTimeColName cName]
TenantId cName -> Just cName NoDimId -> Just [cName]
_ -> Nothing TenantId -> Just [cName]
_ -> Nothing
dimCols = [ factDimFKIdColumnName settingDimPrefix settingDimTableIdColumnName tableName dimCols = [ [ factDimFKIdColumnName settingDimPrefix settingDimTableIdColumnName dimFact dimTable tables ]
| (_, Table {..}) <- allDims ] | (dimFact, dimTable) <- allDims ]
indexSQLs = [ "CREATE INDEX ON " <> tableName table <> settingTableNameSuffixTemplate return [ CreateIndexTSQL ea (nmc "") (name tabName) (map nmc cols)
<> " USING btree (" <> col <> ")" | cols <- factCols ++ dimCols ++ [ [cName, dimTimeColName dimTimeCol]
| col <- factCols ++ dimCols ++ [ cName <> ", " <> dimTimeColName dimTimeCol
| cName <- maybeToList tenantIdCol ] ] | cName <- maybeToList tenantIdCol ] ]
(++ indexSQLs) <$> tableDefnSQL table

View File

@ -1,34 +1,33 @@
{-# LANGUAGE OverloadedStrings #-}
{-# LANGUAGE RecordWildCards #-}
{-# LANGUAGE GADTs #-}
module Ringo.Generator.Internal where module Ringo.Generator.Internal where
import qualified Data.Map as Map import qualified Data.Map as Map
import qualified Data.Text as Text import qualified Data.Text as Text
import Data.List (find) import Database.HsSqlPpp.Syntax (ScalarExpr)
import Data.Monoid ((<>)) import Data.List (find)
import Data.Text (Text) import Data.Monoid ((<>))
import Data.Text (Text)
import Ringo.Extractor.Internal import Ringo.Extractor.Internal
import Ringo.Generator.Sql
import Ringo.Types import Ringo.Types
joinColumnNames :: [ColumnName] -> Text
joinColumnNames = Text.intercalate ",\n"
fullColumnName :: TableName -> ColumnName -> ColumnName
fullColumnName tName cName = tName <> "." <> cName
dimColumnMapping :: Text -> Fact -> TableName -> [(ColumnName, ColumnName)] dimColumnMapping :: Text -> Fact -> TableName -> [(ColumnName, ColumnName)]
dimColumnMapping dimPrefix fact dimTableName = dimColumnMapping dimPrefix fact dimTableName =
[ (dimColumnName dName cName, cName) [ (dimColumnName factColTargetTable factColTargetColumn, factColTargetColumn)
| DimVal dName cName <- factColumns fact | FactColumn { factColType = DimVal {..}, ..} <- factColumns fact
, dimPrefix <> dName == dimTableName ] , dimPrefix <> factColTargetTable == dimTableName ]
coalesceColumn :: TypeDefaults -> TableName -> Column -> Text coalesceColumn :: TypeDefaults -> TableName -> Column -> ScalarExpr
coalesceColumn defaults tName Column{..} = coalesceColumn defaults tName Column{..} =
if columnNullable == Null if columnNullable == Null
then "coalesce(" <> fqColName <> ", " <> defVal columnType <> ")" then app "coalesce" [fqColName, num $ defVal columnType]
else fqColName else fqColName
where where
fqColName = fullColumnName tName columnName fqColName = eqi tName columnName
defVal colType = defVal colType =
maybe (error $ "Default value not known for column type: " ++ Text.unpack colType) snd maybe (error $ "Default value not known for column type: " ++ Text.unpack colType) snd

View File

@ -1,55 +1,62 @@
module Ringo.Generator.Populate.Dimension (dimensionTablePopulateSQL) where {-# LANGUAGE OverloadedStrings #-}
{-# LANGUAGE RecordWildCards #-}
{-# LANGUAGE CPP #-}
{-# LANGUAGE PatternSynonyms #-}
import qualified Data.Text as Text module Ringo.Generator.Populate.Dimension (dimensionTablePopulateSQL) where
#if MIN_VERSION_base(4,8,0) #if MIN_VERSION_base(4,8,0)
#else #else
import Control.Applicative ((<$>)) import Control.Applicative ((<$>))
#endif #endif
import Control.Monad.Reader (Reader, asks) import Control.Monad.Reader (Reader, asks)
import Data.Maybe (fromJust) import Database.HsSqlPpp.Syntax (Statement, QueryExpr(..), Distinct(..), makeSelect, JoinType(..))
import Data.Monoid ((<>)) import Data.Maybe (fromJust)
import Data.Text (Text) import Data.Text (Text)
import Ringo.Extractor.Internal import Ringo.Extractor.Internal
import Ringo.Generator.Internal import Ringo.Generator.Internal
import Ringo.Generator.Sql
import Ringo.Types import Ringo.Types
dimensionTablePopulateSQL :: TablePopulationMode -> Fact -> TableName -> Reader Env Text dimensionTablePopulateSQL :: TablePopulationMode -> Fact -> TableName -> Reader Env Text
dimensionTablePopulateSQL popMode fact dimTableName = do dimensionTablePopulateSQL popMode fact dimTableName =
ppStatement <$> dimensionTablePopulateStmt popMode fact dimTableName
dimensionTablePopulateStmt :: TablePopulationMode -> Fact -> TableName -> Reader Env Statement
dimensionTablePopulateStmt popMode fact dimTableName = do
Settings {..} <- asks envSettings Settings {..} <- asks envSettings
tables <- asks envTables tables <- asks envTables
defaults <- asks envTypeDefaults defaults <- asks envTypeDefaults
let factTable = fromJust $ findTable (factTableName fact) tables let factTable = fromJust $ findTable (factTableName fact) tables
colMapping = dimColumnMapping settingDimPrefix fact dimTableName colMapping = dimColumnMapping settingDimPrefix fact dimTableName
selectCols = [ coalesceColumn defaults (factTableName fact) col <> " AS " <> cName selectCols = [ flip sia (nmc cName) $ coalesceColumn defaults (factTableName fact) col
| (_, cName) <- colMapping | (_, cName) <- colMapping
, let col = fromJust . findColumn cName $ tableColumns factTable ] , let col = fromJust . findColumn cName $ tableColumns factTable ]
timeCol = head [ cName | DimTime cName <- factColumns fact ] timeCol = head ([ cName | DimTimeV cName <- factColumns fact ] :: [ColumnName])
baseSelectC = "SELECT DISTINCT\n" <> joinColumnNames selectCols isNotNullC = parens . foldBinop "or" . map (postop "isnotnull" . ei . snd) $ colMapping
<> "\nFROM " <> factTableName fact selectWhereC = Just . foldBinop "and" $
baseWhereCs = [ "(\n" [ isNotNullC, binop "<" (ei timeCol) placeholder ] ++
<> Text.intercalate "\nOR " [ c <> " IS NOT NULL" | (_, c) <- colMapping ] [ binop ">=" (ei timeCol) placeholder | popMode == IncrementalPopulation ]
<> "\n)" selectC = makeSelect
, timeCol <> " < ?" { selDistinct = Distinct
] , selSelectList = sl selectCols
, selTref = [tref $ factTableName fact]
, selWhere = selectWhereC
}
insertC selectC whereCs = iTableName = suffixTableName popMode settingTableNameSuffixTemplate dimTableName
"INSERT INTO " insertC = insert iTableName (map fst colMapping) $ case popMode of
<> suffixTableName popMode settingTableNameSuffixTemplate dimTableName FullPopulation -> selectC
<> " (\n" <> joinColumnNames (map fst colMapping) <> "\n) " IncrementalPopulation -> let alias = "x" in
<> "SELECT x.* FROM (\n" makeSelect
<> selectC <> "\nWHERE " <> Text.intercalate " AND\n" whereCs { selSelectList = sl [si $ qstar alias]
<> ") x" , selTref =
[ tjoin (subtrefa alias selectC) LeftOuter (tref dimTableName) . Just $
foldBinop "and" [ binop "=" (eqi dimTableName c1) (eqi alias c2) | (c1, c2) <- colMapping ] ]
, selWhere =
Just . foldBinop "and" . map (postop "isnull" . eqi dimTableName . fst) $ colMapping
}
return $ case popMode of return insertC
FullPopulation -> insertC baseSelectC baseWhereCs
IncrementalPopulation ->
insertC baseSelectC (baseWhereCs ++ [ timeCol <> " >= ?" ])
<> "\nLEFT JOIN " <> dimTableName <> " ON\n"
<> Text.intercalate " \nAND "
[ fullColumnName dimTableName c1 <> " = " <> fullColumnName "x" c2
| (c1, c2) <- colMapping ]
<> "\nWHERE " <> Text.intercalate " \nAND "
[ fullColumnName dimTableName c <> " IS NULL" | (c, _) <- colMapping ]

View File

@ -1,22 +1,33 @@
{-# LANGUAGE OverloadedStrings #-}
{-# LANGUAGE RecordWildCards #-}
{-# LANGUAGE ScopedTypeVariables #-}
{-# LANGUAGE CPP #-}
{-# LANGUAGE NamedFieldPuns #-}
{-# LANGUAGE QuasiQuotes #-} {-# LANGUAGE QuasiQuotes #-}
{-# LANGUAGE GADTs #-}
{-# LANGUAGE PatternSynonyms #-}
module Ringo.Generator.Populate.Fact (factTablePopulateSQL) where module Ringo.Generator.Populate.Fact (factTablePopulateSQL) where
import qualified Data.Text as Text import qualified Data.Text as Text
#if MIN_VERSION_base(4,8,0) #if MIN_VERSION_base(4,8,0)
#else #else
import Control.Applicative ((<$>)) import Control.Applicative ((<$>))
#endif #endif
import Control.Monad.Reader (Reader, asks) import Control.Monad.Reader (Reader, asks)
import Data.List (nub) import Database.HsSqlPpp.Syntax ( QueryExpr(..), Statement, makeSelect
import Data.Maybe (fromJust, fromMaybe, mapMaybe, listToMaybe) , SelectList(..), SelectItem(..), JoinType(..) )
import Data.Monoid ((<>)) import Data.List (nub)
import Data.Text (Text) import Data.Maybe (fromJust, fromMaybe, listToMaybe)
import Text.RawString.QQ (r) import Data.Monoid ((<>))
import Data.Text (Text)
import Text.RawString.QQ (r)
import Ringo.Extractor.Internal import Ringo.Extractor.Internal
import Ringo.Generator.Internal import Ringo.Generator.Internal
import Ringo.Generator.Sql
import Ringo.Types import Ringo.Types
import Ringo.Utils import Ringo.Utils
@ -43,72 +54,78 @@ BEGIN
RETURN r; RETURN r;
END; END;
$$ $$
LANGUAGE 'plpgsql' IMMUTABLE|] LANGUAGE 'plpgsql' IMMUTABLE;
|]
data FactTablePopulateSelectSQL = FactTablePopulateSelectSQL factCountDistinctUpdateStmts :: TablePopulationMode -> Fact -> Text -> QueryExpr -> Reader Env [Statement]
{ ftpsSelectCols :: ![(Text, Text)] factCountDistinctUpdateStmts popMode fact groupByColPrefix expr = case expr of
, ftpsSelectTable :: !Text Select {selSelectList = SelectList _ origSelectItems, ..} -> do
, ftpsJoinClauses :: ![Text] Settings {..} <- asks envSettings
, ftpsWhereClauses :: ![Text] tables <- asks envTables
, ftpsGroupByCols :: ![Text] let fTableName = factTableName fact
} deriving (Show, Eq) fTable = fromJust . findTable fTableName $ tables
tablePKColName = head [ cName | PrimaryKey cName <- tableConstraints fTable ]
extFactTableName =
suffixTableName popMode settingTableNameSuffixTemplate
$ extractedFactTableName settingFactPrefix settingFactInfix (factName fact) settingTimeUnit
factTableUpdateSQL :: TablePopulationMode -> Fact -> Text -> FactTablePopulateSelectSQL -> Reader Env [Text] return $ forMaybe (factColumns fact) $ \FactColumn {factColTargetColumn = cName, ..} ->
factTableUpdateSQL popMode fact groupByColPrefix populateSelectSQL@FactTablePopulateSelectSQL {..} = do case factColType of
Settings {..} <- asks envSettings FactCountDistinct {factColMaybeSourceColumn = scName} ->
tables <- asks envTables let unqCol = cast (eqi fTableName (fromMaybe tablePKColName scName)) "text"
let countDistinctCols = [ col | col@(FactCountDistinct _ _) <- factColumns fact]
fTableName = factTableName fact
fTable = fromJust . findTable fTableName $ tables
tablePKColName = head [ cName | PrimaryKey cName <- tableConstraints fTable ]
extFactTableName =
suffixTableName popMode settingTableNameSuffixTemplate
$ extractedFactTableName settingFactPrefix settingFactInfix (factName fact) settingTimeUnit
return . (\xs -> if null xs then xs else ilog2FunctionString : xs) bucketSelectCols =
$ for countDistinctCols $ \(FactCountDistinct scName cName) -> [ sia (binop "&" (app "hashtext" [ unqCol ])
let unqCol = fullColumnName fTableName (fromMaybe tablePKColName scName) <> "::text" (num . Text.pack . show $ bucketCount settingFactCountDistinctErrorRate - 1))
(nmc $ cName <> "_bnum")
, sia (binop "-"
(num "31")
(app "ilog2"
[ app "min" [ binop "&"
(app "hashtext" [ unqCol ])
(prefop "~" (parens (binop "<<" (num "1") (num "31"))))]]))
(nmc $ cName <> "_bhash")
]
bucketSelectCols = groupByCols = map ppScalarExpr selGroupBy
[ ( "hashtext(" <> unqCol <> ") & " selectList =
<> Text.pack (show $ bucketCount settingFactCountDistinctErrorRate - 1) [ i | i@(SelectItem _ _ a) <- origSelectItems , a `elem` map nmc groupByCols ]
, cName <> "_bnum"
)
, ( "31 - ilog2(min(hashtext(" <> unqCol <> ") & ~(1 << 31)))"
, cName <> "_bhash"
)
]
selectSQL = toSelectSQL $ selectStmt =
populateSelectSQL makeSelect
{ ftpsSelectCols = filter ((`elem` ftpsGroupByCols) . snd) ftpsSelectCols ++ bucketSelectCols { selSelectList = sl $ selectList ++ bucketSelectCols
, ftpsGroupByCols = ftpsGroupByCols ++ [ cName <> "_bnum" ] , selTref = selTref
, ftpsWhereClauses = ftpsWhereClauses ++ [ unqCol <> " IS NOT NULL" ] , selWhere = binop "and" (postop "isnotnull" unqCol) <$> selWhere
} , selGroupBy = selGroupBy ++ [ ei $ cName <> "_bnum" ]
}
aggSelectClause = aggSelectClause =
"json_object_agg(" <> cName <> "_bnum, " <> cName <> "_bhash) AS " <> cName sia (app "json_object_agg" [ ei (cName <> "_bnum"), ei (cName <> "_bhash") ]) (nmc cName)
in "UPDATE " <> extFactTableName in Just $ update extFactTableName
<> "\nSET " <> cName <> " = " <> fullColumnName "xyz" cName [ (cName, eqi "xyz" cName) ]
<> "\nFROM (" [ subtrefa "xyz"
<> "\nSELECT " <> joinColumnNames (ftpsGroupByCols ++ [aggSelectClause]) makeSelect
<> "\nFROM (\n" <> selectSQL <> "\n) zyx" { selSelectList = sl $ map (si . ei) groupByCols ++ [ aggSelectClause ]
<> "\nGROUP BY \n" <> joinColumnNames ftpsGroupByCols , selTref = [ subtrefa "zyx" selectStmt ]
<> "\n) xyz" , selGroupBy = selGroupBy
<> "\n WHERE\n" } ] $
<> Text.intercalate "\nAND " foldBinop "and"
[ fullColumnName extFactTableName .fromJust . Text.stripPrefix groupByColPrefix $ col [ binop "=" (eqi extFactTableName . fromJust . Text.stripPrefix groupByColPrefix $ col)
<> " = " <> fullColumnName "xyz" col (eqi "xyz" col)
| col <- ftpsGroupByCols ] | col <- groupByCols ]
_ -> Nothing
_ -> return []
where where
bucketCount :: Double -> Integer bucketCount :: Double -> Integer
bucketCount errorRate = bucketCount errorRate =
let power :: Double = fromIntegral (ceiling . logBase 2 $ (1.04 / errorRate) ** 2 :: Integer) let power :: Double = fromIntegral (ceiling . logBase 2 $ (1.04 / errorRate) ** 2 :: Integer)
in ceiling $ 2 ** power in ceiling $ 2 ** power
factTablePopulateSQL :: TablePopulationMode -> Fact -> Reader Env [Text] factTablePopulateStmts :: TablePopulationMode -> Fact -> Reader Env [Statement]
factTablePopulateSQL popMode fact = do factTablePopulateStmts popMode fact = do
Settings {..} <- asks envSettings Settings {..} <- asks envSettings
allDims <- extractAllDimensionTables fact allDims <- extractAllDimensionTables fact
tables <- asks envTables tables <- asks envTables
@ -117,116 +134,104 @@ factTablePopulateSQL popMode fact = do
fTable = fromJust . findTable fTableName $ tables fTable = fromJust . findTable fTableName $ tables
dimIdColName = settingDimTableIdColumnName dimIdColName = settingDimTableIdColumnName
coalesceFKId col = coalesceFKId ex =
if "coalesce" `Text.isPrefixOf` col app "coalesce" [ ex, num . Text.pack . show $ settingForeignKeyIdCoalesceValue ]
then col
else "coalesce((" <> col <> "), " <> Text.pack (show settingForeignKeyIdCoalesceValue) <> ")"
timeUnitColumnInsertSQL cName = timeUnitColumnInsertSQL cName =
let colName = timeUnitColumnName dimIdColName cName settingTimeUnit let colName = timeUnitColumnName dimIdColName cName settingTimeUnit
in ( colName in ( colName
, "floor(extract(epoch from " <> fullColumnName fTableName cName <> ")/" , cast (app "floor" [ binop "/" (extEpoch (eqi fTableName cName))
<> Text.pack (show $ timeUnitToSeconds settingTimeUnit) <> ")::bigint" (num . Text.pack . show . timeUnitToSeconds $ settingTimeUnit) ])
"bigint"
, True , True
) )
dimIdColumnInsertSQL cName = dimIdColumnInsertSQL cName =
let sCol = fromJust . findColumn cName $ tableColumns fTable let sCol = fromJust . findColumn cName $ tableColumns fTable
in (cName, coalesceColumn defaults fTableName sCol, True) in (cName, coalesceColumn defaults fTableName sCol, True)
factColMap = concatFor (factColumns fact) $ \col -> case col of app' f cName = app f [ eqi fTableName cName ]
DimTime cName -> [ timeUnitColumnInsertSQL cName ]
NoDimId cName -> [ dimIdColumnInsertSQL cName ] factColMap = concatFor (factColumns fact) $ \FactColumn {factColTargetColumn = cName, ..} ->
TenantId cName -> [ dimIdColumnInsertSQL cName ] case factColType of
FactCount scName cName -> DimTime -> [ timeUnitColumnInsertSQL cName ]
[ (cName, "count(" <> maybe "*" (fullColumnName fTableName) scName <> ")", False) ] NoDimId -> [ dimIdColumnInsertSQL cName ]
FactSum scName cName -> TenantId -> [ dimIdColumnInsertSQL cName ]
[ (cName, "sum(" <> fullColumnName fTableName scName <> ")", False) ] FactCount {..} ->
FactMax scName cName -> [ (cName, app "count" [ maybe star (eqi fTableName) factColMaybeSourceColumn ], False) ]
[ (cName, "max(" <> fullColumnName fTableName scName <> ")", False) ] FactCountDistinct {..} -> [ (cName, cast (str "{}") "json", False) ]
FactMin scName cName -> FactSum {..} -> [ (cName, app' "sum" factColSourceColumn, False) ]
[ (cName, "min(" <> fullColumnName fTableName scName <> ")", False) ] FactMax {..} -> [ (cName, app' "max" factColSourceColumn, False) ]
FactAverage scName cName -> FactMin {..} -> [ (cName, app' "min" factColSourceColumn, False) ]
[ ( cName <> settingAvgCountColumSuffix FactAverage {..} ->
, "count(" <> fullColumnName fTableName scName <> ")" [ ( cName <> settingAvgCountColumSuffix, app' "count" factColSourceColumn, False )
, False , ( cName <> settingAvgSumColumnSuffix , app' "sum" factColSourceColumn , False)
) ]
, ( cName <> settingAvgSumColumnSuffix _ -> []
, "sum(" <> fullColumnName fTableName scName <> ")"
, False
)
]
FactCountDistinct _ cName -> [ (cName, "'{}'::json", False)]
_ -> []
dimColMap = for allDims $ \(dimFact, factTable@Table {tableName}) -> let dimColMap = for allDims $ \(dimFact, factTable@Table {tableName}) -> let
dimFKIdColName = factDimFKIdColumnName settingDimPrefix dimIdColName tableName dimFKIdColName =
factDimFKIdColumnName settingDimPrefix dimIdColName dimFact factTable tables
factSourceTableName = factTableName dimFact factSourceTableName = factTableName dimFact
factSourceTable = fromJust . findTable factSourceTableName $ tables factSourceTable = fromJust . findTable factSourceTableName $ tables
dimFKIdColumn = fromJust . findColumn dimFKIdColName $ tableColumns factSourceTable dimFKIdColumn = fromJust . findColumn dimFKIdColName $ tableColumns factSourceTable
dimLookupWhereClauses = dimLookupWhereClauses = Just . foldBinop "and" $
[ fullColumnName tableName dimColName <> " = " <> coalesceColumn defaults factSourceTableName sourceCol [ binop "=" (eqi tableName dimColName) (coalesceColumn defaults factSourceTableName sourceCol)
| (dimColName, sourceColName) <- dimColumnMapping settingDimPrefix dimFact tableName | (dimColName, sourceColName) <- dimColumnMapping settingDimPrefix dimFact tableName
, let sourceCol = fromJust . findColumn sourceColName $ tableColumns factSourceTable ] , let sourceCol = fromJust . findColumn sourceColName $ tableColumns factSourceTable ]
insertSQL = if factTable `elem` tables -- existing dimension table insertExpr = if factTable `elem` tables -- existing dimension table
then (if columnNullable dimFKIdColumn == Null then coalesceFKId else id) then (if columnNullable dimFKIdColumn == Null then coalesceFKId else id)
$ fullColumnName factSourceTableName dimFKIdColName $ eqi factSourceTableName dimFKIdColName
else "SELECT " <> dimIdColName <> " FROM " else coalesceFKId . subQueryExp $
<> suffixTableName popMode settingTableNameSuffixTemplate tableName <> " " <> tableName makeSelect
<> "\nWHERE " <> Text.intercalate "\n AND " dimLookupWhereClauses { selSelectList = sl [ si $ ei dimIdColName ]
in (dimFKIdColName, coalesceFKId insertSQL, True) , selTref =
[ trefa (suffixTableName popMode settingTableNameSuffixTemplate tableName) tableName ]
, selWhere = dimLookupWhereClauses
}
in (dimFKIdColName, insertExpr, True)
colMap = [ (cName, (sql, groupByColPrefix <> cName), addToGroupBy) colMap = [ (cName, (expr, nmc $ groupByColPrefix <> cName), addToGroupBy)
| (cName, sql, addToGroupBy) <- factColMap ++ dimColMap ] | (cName, expr, addToGroupBy) <- factColMap ++ dimColMap ]
joinClauses = joinClauses =
mapMaybe (\tName -> (\p -> "LEFT JOIN " <> tName <> "\nON "<> p) <$> joinClausePreds fTable tName) map (tref &&& joinClausePreds fTable)
. filter (/= fTableName)
. nub . nub
. map (factTableName . fst) . map (factTableName . fst)
$ allDims $ allDims
timeCol = fullColumnName fTableName $ head [ cName | DimTime cName <- factColumns fact ] timeCol = eqi fTableName $ head [ cName | DimTimeV cName <- factColumns fact ]
extFactTableName = suffixTableName popMode settingTableNameSuffixTemplate extFactTableName = suffixTableName popMode settingTableNameSuffixTemplate
$ extractedFactTableName settingFactPrefix settingFactInfix (factName fact) settingTimeUnit $ extractedFactTableName settingFactPrefix settingFactInfix (factName fact) settingTimeUnit
populateSelectSQL = populateSelectExpr =
FactTablePopulateSelectSQL makeSelect
{ ftpsSelectCols = map snd3 colMap { selSelectList = sl . map (uncurry sia . snd3) $ colMap
, ftpsSelectTable = fTableName , selTref = [ foldl (\tf (t, oc) -> tjoin tf LeftOuter t oc) (tref fTableName) joinClauses ]
, ftpsJoinClauses = joinClauses , selWhere = Just . foldBinop "and" $
, ftpsWhereClauses = binop "<" timeCol placeholder :
timeCol <> " < ?" : [ timeCol <> " >= ?" | popMode == IncrementalPopulation ] [ binop ">=" timeCol placeholder | popMode == IncrementalPopulation ]
, ftpsGroupByCols = map ((groupByColPrefix <>) . fst3) . filter thd3 $ colMap , selGroupBy = map (ei . (groupByColPrefix <>) . fst3) . filter thd3 $ colMap
} }
insertIntoSQL = "INSERT INTO " <> extFactTableName insertIntoStmt = insert extFactTableName (map fst3 colMap) populateSelectExpr
<> " (\n" <> Text.intercalate ",\n " (map fst3 colMap) <> "\n)\n"
<> toSelectSQL populateSelectSQL
updateSQLs <- factTableUpdateSQL popMode fact groupByColPrefix populateSelectSQL updateStmts <- factCountDistinctUpdateStmts popMode fact groupByColPrefix populateSelectExpr
return $ insertIntoStmt : updateStmts
return $ insertIntoSQL : updateSQLs
where where
groupByColPrefix = "xxff_" groupByColPrefix = "xxff_"
joinClausePreds table oTableName = joinClausePreds table oTableName =
Text.intercalate " AND " foldBinop "and"
. map (\(c1, c2) -> fullColumnName (tableName table) c1 <> " = " <> fullColumnName oTableName c2) . map (\(c1, c2) -> binop "=" (eqi (tableName table) c1) (eqi oTableName c2))
<$> listToMaybe [ colPairs | ForeignKey tName colPairs <- tableConstraints table <$> listToMaybe [ colPairs | ForeignKey tName colPairs <- tableConstraints table
, tName == oTableName ] , tName == oTableName ]
toSelectSQL :: FactTablePopulateSelectSQL -> Text factTablePopulateSQL :: TablePopulationMode -> Fact -> Reader Env [Text]
toSelectSQL FactTablePopulateSelectSQL {..} = factTablePopulateSQL popMode fact = do
"SELECT \n" <> joinColumnNames (map (uncurry asName) ftpsSelectCols) stmts <- factTablePopulateStmts popMode fact
<> "\nFROM " <> ftpsSelectTable return $ case stmts of
<> (if not . null $ ftpsJoinClauses [] -> []
then "\n" <> Text.intercalate "\n" ftpsJoinClauses [i] -> [ ppStatement i ]
else "") i:us -> [ ppStatement i, ilog2FunctionString ] ++ map ppStatement us
<> (if not . null $ ftpsWhereClauses
then "\nWHERE " <> Text.intercalate "\nAND " ftpsWhereClauses
else "")
<> "\nGROUP BY \n"
<> joinColumnNames ftpsGroupByCols
where
asName sql alias = "(" <> sql <> ")" <> " as " <> alias

128
src/Ringo/Generator/Sql.hs Normal file
View File

@ -0,0 +1,128 @@
{-# LANGUAGE OverloadedStrings #-}
module Ringo.Generator.Sql where
import qualified Data.Text as Text
import qualified Data.Text.Lazy as TL
import Database.HsSqlPpp.Annotation
import Database.HsSqlPpp.Dialect
import Database.HsSqlPpp.Pretty
import Database.HsSqlPpp.Syntax
import Data.Text (Text)
ea :: Annotation
ea = emptyAnnotation
name :: Text -> Name
name n = Name ea [nmc n]
nmc :: Text -> NameComponent
nmc = Nmc . Text.unpack
attDef :: Text -> Text -> RowConstraint -> AttributeDef
attDef nam typ constr =
AttributeDef ea (nmc nam) (SimpleTypeName ea $ name typ) Nothing [constr]
member :: ScalarExpr -> ScalarExpr -> ScalarExpr
member = BinaryOp ea (name ".")
num :: Text -> ScalarExpr
num = NumberLit ea . Text.unpack
str :: Text -> ScalarExpr
str = StringLit ea . Text.unpack
extEpoch :: ScalarExpr -> ScalarExpr
extEpoch = Extract ea ExtractEpoch
app :: Text -> [ScalarExpr] -> ScalarExpr
app n = App ea (name n)
cast :: ScalarExpr -> Text -> ScalarExpr
cast ex = Cast ea ex . SimpleTypeName ea . name
prefop :: Text -> ScalarExpr -> ScalarExpr
prefop n = PrefixOp ea (name n)
postop :: Text -> ScalarExpr -> ScalarExpr
postop n = PostfixOp ea (name n)
binop :: Text -> ScalarExpr -> ScalarExpr -> ScalarExpr
binop n = BinaryOp ea (name n)
foldBinop :: Text -> [ScalarExpr] -> ScalarExpr
foldBinop _ [] = error "List must be non empty"
foldBinop n (a : as) = foldl (binop n) a as
placeholder :: ScalarExpr
placeholder = Placeholder ea
parens :: ScalarExpr -> ScalarExpr
parens = Parens ea
qstar :: Text -> ScalarExpr
qstar = QStar ea . nmc
star :: ScalarExpr
star = Star ea
subQueryExp :: QueryExpr -> ScalarExpr
subQueryExp = ScalarSubQuery ea
-- Table ref
tref :: Text -> TableRef
tref = Tref ea . name
-- Table ref alias
trefa :: Text -> Text -> TableRef
trefa t a = TableAlias ea (nmc a) $ Tref ea (name t)
-- Subquery Table ref alias
subtrefa :: Text -> QueryExpr -> TableRef
subtrefa a = TableAlias ea (nmc a) . SubTref ea
-- Table join
tjoin :: TableRef -> JoinType -> TableRef -> Maybe ScalarExpr -> TableRef
tjoin ta jt tb on = JoinTref ea ta Unnatural jt Nothing tb (fmap (JoinOn ea) on)
-- Select item
si :: ScalarExpr -> SelectItem
si = SelExp ea
-- Select item alias
sia :: ScalarExpr -> NameComponent -> SelectItem
sia = SelectItem ea
-- Expression qualified identifier
eqi :: Text -> Text -> ScalarExpr
eqi c = Identifier ea . qn c
-- Expression identifier
ei :: Text -> ScalarExpr
ei = Identifier ea . name
-- Qualified name
qn :: Text -> Text -> Name
qn c n = Name ea [nmc c, nmc n]
-- Select list
sl :: [SelectItem] -> SelectList
sl = SelectList ea
-- Insert statement
insert :: Text -> [Text] -> QueryExpr -> Statement
insert tName cNames selectExp =
Insert ea (name tName) (map nmc cNames) selectExp Nothing
-- Update statement
update :: Text -> [(Text, ScalarExpr)] -> [TableRef] -> ScalarExpr -> Statement
update tName setClauseList fromList whr =
Update ea (name tName) (map (uncurry (SetClause ea . nmc)) setClauseList) fromList (Just whr) Nothing
-- Pretty print statement
ppStatement :: Statement -> Text
ppStatement st = TL.toStrict $ prettyStatements (PrettyFlags postgresDialect) [st]
-- Pretty print scalar expression
ppScalarExpr :: ScalarExpr -> Text
ppScalarExpr = TL.toStrict . prettyScalarExpr (PrettyFlags postgresDialect)

View File

@ -1,32 +1,62 @@
{-# LANGUAGE OverloadedStrings #-}
{-# LANGUAGE RecordWildCards #-}
{-# LANGUAGE GADTs #-}
{-# LANGUAGE Rank2Types #-}
{-# LANGUAGE StandaloneDeriving #-}
{-# LANGUAGE PatternSynonyms #-}
module Ringo.Types where module Ringo.Types where
import qualified Data.Text as Text import qualified Data.Text as Text
import Data.Map (Map) import Data.Map (Map)
import Data.Text (Text) import Data.Monoid ((<>))
import Data.Text (Text)
showColNames :: [Text] -> String
showColNames cols = Text.unpack $ "(" <> Text.intercalate ", " cols <> ")"
type ColumnName = Text type ColumnName = Text
type ColumnType = Text type ColumnType = Text
type TableName = Text type TableName = Text
data Nullable = Null | NotNull deriving (Eq, Enum, Show) data Nullable = Null | NotNull deriving (Eq, Enum)
instance Show Nullable where
show Null = "NULL"
show NotNull = "NOT NULL"
data Column = Column data Column = Column
{ columnName :: !ColumnName { columnName :: !ColumnName
, columnType :: !ColumnType , columnType :: !ColumnType
, columnNullable :: !Nullable , columnNullable :: !Nullable
} deriving (Eq, Show) } deriving (Eq)
instance Show Column where
show Column {..} = "Column "
++ Text.unpack columnName ++ " "
++ Text.unpack columnType ++ " "
++ show columnNullable
data TableConstraint = PrimaryKey !ColumnName data TableConstraint = PrimaryKey !ColumnName
| UniqueKey ![ColumnName] | UniqueKey ![ColumnName]
| ForeignKey !TableName ![(ColumnName, ColumnName)] | ForeignKey !TableName ![(ColumnName, ColumnName)]
deriving (Eq, Show) deriving (Eq)
instance Show TableConstraint where
show (PrimaryKey col) = "PrimaryKey " ++ Text.unpack col
show (UniqueKey cols) = "UniqueKey " ++ showColNames cols
show (ForeignKey tName colMap) = "ForeignKey " ++ showColNames (map fst colMap) ++ " "
++ Text.unpack tName ++ " " ++ showColNames (map snd colMap)
data Table = Table data Table = Table
{ tableName :: !TableName { tableName :: !TableName
, tableColumns :: ![Column] , tableColumns :: ![Column]
, tableConstraints :: ![TableConstraint] , tableConstraints :: ![TableConstraint]
} deriving (Eq, Show) } deriving (Eq)
instance Show Table where
show Table {..} =
unlines $ ("Table " ++ Text.unpack tableName) : map show tableColumns ++ map show tableConstraints
data TimeUnit = Second | Minute | Hour | Day | Week data TimeUnit = Second | Minute | Hour | Day | Week
deriving (Eq, Enum, Show, Read) deriving (Eq, Enum, Show, Read)
@ -47,33 +77,59 @@ data Fact = Fact
, factTablePersistent :: !Bool , factTablePersistent :: !Bool
, factParentNames :: ![TableName] , factParentNames :: ![TableName]
, factColumns :: ![FactColumn] , factColumns :: ![FactColumn]
} deriving (Eq, Show) } deriving (Show)
data FactColumn = DimTime !ColumnName data FCTNone
| NoDimId !ColumnName data FCTTargetTable
| TenantId !ColumnName data FCTMaybeSourceColumn
| DimId !TableName !ColumnName data FCTSourceColumn
| DimVal !TableName !ColumnName
| FactCount !(Maybe ColumnName) !ColumnName data FactColumnType a where
| FactSum !ColumnName !ColumnName DimTime :: FactColumnType FCTNone
| FactAverage !ColumnName !ColumnName NoDimId :: FactColumnType FCTNone
| FactCountDistinct !(Maybe ColumnName) !ColumnName TenantId :: FactColumnType FCTNone
| FactMax !ColumnName !ColumnName DimId :: { factColTargetTable :: !TableName } -> FactColumnType FCTTargetTable
| FactMin !ColumnName !ColumnName DimVal :: { factColTargetTable :: !TableName } -> FactColumnType FCTTargetTable
deriving (Eq, Show) FactCount :: { factColMaybeSourceColumn :: !(Maybe ColumnName) } -> FactColumnType FCTMaybeSourceColumn
FactCountDistinct :: { factColMaybeSourceColumn :: !(Maybe ColumnName) } -> FactColumnType FCTMaybeSourceColumn
FactSum :: { factColSourceColumn :: !ColumnName } -> FactColumnType FCTSourceColumn
FactAverage :: { factColSourceColumn :: !ColumnName } -> FactColumnType FCTSourceColumn
FactMax :: { factColSourceColumn :: !ColumnName } -> FactColumnType FCTSourceColumn
FactMin :: { factColSourceColumn :: !ColumnName } -> FactColumnType FCTSourceColumn
deriving instance Show (FactColumnType a)
pattern DimTimeV col <- FactColumn col DimTime
pattern NoDimIdV col <- FactColumn col NoDimId
pattern TenantIdV col <- FactColumn col TenantId
pattern DimIdV col <- FactColumn col DimId {..}
pattern DimValV col <- FactColumn col DimVal {..}
pattern FactCountV col <- FactColumn col FactCount {..}
pattern FactCountDistinctV col <- FactColumn col FactCountDistinct {..}
pattern FactSumV col <- FactColumn col FactSum {..}
pattern FactAverageV col <- FactColumn col FactAverage {..}
pattern FactMaxV col <- FactColumn col FactMax {..}
pattern FactMinV col <- FactColumn col FactMin {..}
data FactColumn = forall a. FactColumn
{ factColTargetColumn :: !ColumnName
, factColType :: FactColumnType a }
deriving instance Show FactColumn
factSourceColumnName :: FactColumn -> Maybe ColumnName factSourceColumnName :: FactColumn -> Maybe ColumnName
factSourceColumnName (DimTime cName) = Just cName factSourceColumnName FactColumn {..} = case factColType of
factSourceColumnName (NoDimId cName) = Just cName DimTime -> Just factColTargetColumn
factSourceColumnName (TenantId cName) = Just cName NoDimId -> Just factColTargetColumn
factSourceColumnName (DimId _ cName) = Just cName TenantId -> Just factColTargetColumn
factSourceColumnName (DimVal _ cName) = Just cName DimId {..} -> Just factColTargetColumn
factSourceColumnName (FactCount cName _) = cName DimVal {..} -> Just factColTargetColumn
factSourceColumnName (FactSum cName _) = Just cName FactCount {..} -> factColMaybeSourceColumn
factSourceColumnName (FactAverage cName _) = Just cName FactCountDistinct {..} -> factColMaybeSourceColumn
factSourceColumnName (FactCountDistinct cName _) = cName FactSum {..} -> Just factColSourceColumn
factSourceColumnName (FactMax cName _) = Just cName FactAverage {..} -> Just factColSourceColumn
factSourceColumnName (FactMin cName _) = Just cName FactMax {..} -> Just factColSourceColumn
FactMin {..} -> Just factColSourceColumn
data Settings = Settings data Settings = Settings
{ settingDimPrefix :: !Text { settingDimPrefix :: !Text
@ -127,7 +183,7 @@ data Env = Env
, envFacts :: ![Fact] , envFacts :: ![Fact]
, envSettings :: !Settings , envSettings :: !Settings
, envTypeDefaults :: !TypeDefaults , envTypeDefaults :: !TypeDefaults
} deriving (Eq, Show) } deriving (Show)
data TablePopulationMode = FullPopulation | IncrementalPopulation deriving (Eq, Show) data TablePopulationMode = FullPopulation | IncrementalPopulation deriving (Eq, Show)

View File

@ -1,3 +1,5 @@
{-# LANGUAGE OverloadedStrings #-}
{-# LANGUAGE CPP #-}
module Ringo.Utils where module Ringo.Utils where
import qualified Control.Arrow as Arrow import qualified Control.Arrow as Arrow

View File

@ -1,3 +1,9 @@
{-# LANGUAGE OverloadedStrings #-}
{-# LANGUAGE RecordWildCards #-}
{-# LANGUAGE CPP #-}
{-# LANGUAGE GADTs #-}
{-# LANGUAGE PatternSynonyms #-}
module Ringo.Validator module Ringo.Validator
( validateTable ( validateTable
, validateFact , validateFact
@ -48,17 +54,19 @@ validateFact Fact {..} = do
parentVs <- concat <$> mapM checkFactParents factParentNames parentVs <- concat <$> mapM checkFactParents factParentNames
let colVs = concatMap (checkColumn tables table) factColumns let colVs = concatMap (checkColumn tables table) factColumns
timeVs = [ MissingTimeColumn factTableName timeVs = [ MissingTimeColumn factTableName
| null [ c | DimTime c <- factColumns ] ] | null ([ cName | DimTimeV cName <- factColumns ] :: [ColumnName]) ]
notNullVs = [ MissingNotNullConstraint factTableName c notNullVs = [ MissingNotNullConstraint factTableName cName
| DimTime c <- factColumns | DimTimeV cName <- factColumns
, let col = findColumn c (tableColumns table) , let col = findColumn cName (tableColumns table)
, isJust col , isJust col
, columnNullable (fromJust col) == Null ] , columnNullable (fromJust col) == Null ]
typeDefaultVs = typeDefaultVs =
[ MissingTypeDefault cType [ MissingTypeDefault cType
| cName <- [ c | DimVal _ c <- factColumns ] | cName <- [ c | DimValV c <- factColumns ]
++ [ c | NoDimId c <- factColumns ] ++ [ c | NoDimIdV c <- factColumns ]
++ [ c | TenantId c <- factColumns ] ++ [ c | TenantIdV c <- factColumns ]
++ [ c | DimIdV c <- factColumns ]
, let col = findColumn cName (tableColumns table) , let col = findColumn cName (tableColumns table)
, isJust col , isJust col
, let cType = columnType $ fromJust col , let cType = columnType $ fromJust col
@ -76,6 +84,7 @@ validateFact Fact {..} = do
maybe [] (checkTableForCol table) (factSourceColumnName factCol) maybe [] (checkTableForCol table) (factSourceColumnName factCol)
++ checkColumnTable tables factCol ++ checkColumnTable tables factCol
checkColumnTable tables factCol = case factCol of checkColumnTable :: [Table] -> FactColumn -> [ValidationError]
DimId tName _ -> maybe [ MissingTable tName ] (const []) $ findTable tName tables checkColumnTable tables FactColumn {..} = case factColType of
_ -> [] DimId {factColTargetTable = tName} -> maybe [ MissingTable tName ] (const []) $ findTable tName tables
_ -> []

View File

@ -1,7 +1,7 @@
# For more information, see: https://github.com/commercialhaskell/stack/blob/master/doc/yaml_configuration.md # For more information, see: https://github.com/commercialhaskell/stack/blob/master/doc/yaml_configuration.md
# Specifies the GHC version and set of packages available (e.g., lts-3.5, nightly-2015-09-21, ghc-7.10.2) # Specifies the GHC version and set of packages available (e.g., lts-3.5, nightly-2015-09-21, ghc-7.10.2)
resolver: lts-3.20 resolver: lts-5.1
# Local packages, usually specified by relative directory name # Local packages, usually specified by relative directory name
packages: packages:

View File

@ -1,2 +0,0 @@
main :: IO ()
main = putStrLn "Test suite not yet implemented"

2
test/doctests.hs Normal file
View File

@ -0,0 +1,2 @@
import Test.DocTest
main = doctest ["-isrc", "Ringo"]