|
|
|
@ -1,5 +1,10 @@ |
|
|
|
|
{-# LANGUAGE OverloadedStrings #-} |
|
|
|
|
module Ringo |
|
|
|
|
( module Ringo.Types |
|
|
|
|
( -- | The examples in this module assume the following code has been run. |
|
|
|
|
-- The :{ and :} will only work in GHCi. |
|
|
|
|
|
|
|
|
|
-- $setup |
|
|
|
|
module Ringo.Types |
|
|
|
|
, extractFactTable |
|
|
|
|
, extractDimensionTables |
|
|
|
|
, extractDependencies |
|
|
|
@ -19,31 +24,608 @@ import qualified Ringo.Extractor as E |
|
|
|
|
import qualified Ringo.Generator as G |
|
|
|
|
import qualified Ringo.Validator as V |
|
|
|
|
|
|
|
|
|
-- $setup |
|
|
|
|
-- >>> :set -XOverloadedStrings |
|
|
|
|
-- >>> import Ringo |
|
|
|
|
-- >>> import qualified Data.Map as Map |
|
|
|
|
-- >>> import qualified Data.Text as Text |
|
|
|
|
-- >>> import Data.List (nub) |
|
|
|
|
-- >>> import Text.Show.Pretty |
|
|
|
|
-- >>> :{ |
|
|
|
|
--let sessionEventsTable = |
|
|
|
|
-- Table { tableName = "session_events" |
|
|
|
|
-- , tableColumns = |
|
|
|
|
-- [ Column "id" "uuid" NotNull |
|
|
|
|
-- , Column "created_at" "timestamp without time zone" NotNull |
|
|
|
|
-- , Column "member_id" "integer" Null |
|
|
|
|
-- , Column "publisher_id" "integer" NotNull |
|
|
|
|
-- , Column "user_agent" "character varying(1024)" Null |
|
|
|
|
-- , Column "browser_name" "character varying(50)" Null |
|
|
|
|
-- , Column "os" "character varying(50)" Null |
|
|
|
|
-- , Column "geo_country_name" "character varying(50)" Null |
|
|
|
|
-- , Column "geo_city_name" "character varying(50)" Null |
|
|
|
|
-- , Column "geo_continent_name" "character varying(15)" Null |
|
|
|
|
-- , Column "geo_most_specific_subdivision_name" "character varying(100)" Null |
|
|
|
|
-- , Column "geo_longitude" "numeric(9,6)" Null |
|
|
|
|
-- , Column "geo_latitude" "numeric(9,6)" Null |
|
|
|
|
-- , Column "geo_time_zone" "character varying(20)" Null |
|
|
|
|
-- , Column "geo_postal_code" "character varying(20)" Null |
|
|
|
|
-- , Column "user_agent_name" "character varying(100)" Null |
|
|
|
|
-- , Column "user_agent_type" "character varying(15)" Null |
|
|
|
|
-- , Column "user_agent_version" "character varying(100)" Null |
|
|
|
|
-- , Column "user_agent_device" "character varying(15)" Null |
|
|
|
|
-- ] |
|
|
|
|
-- , tableConstraints = [ PrimaryKey "id" ] |
|
|
|
|
-- } |
|
|
|
|
-- sessionFact = |
|
|
|
|
-- Fact { factName = "session" |
|
|
|
|
-- , factTableName = "session_events" |
|
|
|
|
-- , factTablePersistent = True |
|
|
|
|
-- , factParentNames = [] |
|
|
|
|
-- , factColumns = |
|
|
|
|
-- [ FactColumn "created_at" $ DimTime |
|
|
|
|
-- , FactColumn "publisher_id" $ NoDimId |
|
|
|
|
-- , FactColumn "browser_name" $ DimVal "user_agent" |
|
|
|
|
-- , FactColumn "os" $ DimVal "user_agent" |
|
|
|
|
-- , FactColumn "user_agent_name" $ DimVal "user_agent" |
|
|
|
|
-- , FactColumn "geo_country_name" $ DimVal "geo" |
|
|
|
|
-- , FactColumn "geo_city_name" $ DimVal "geo" |
|
|
|
|
-- , FactColumn "geo_continent_name" $ DimVal "geo" |
|
|
|
|
-- , FactColumn "session_count" $ FactCount Nothing |
|
|
|
|
-- ] |
|
|
|
|
-- } |
|
|
|
|
-- pageViewEventsTable = |
|
|
|
|
-- Table { tableName = "page_view_events" |
|
|
|
|
-- , tableColumns = |
|
|
|
|
-- [ Column "id" "uuid" NotNull |
|
|
|
|
-- , Column "created_at" "timestamp without time zone" NotNull |
|
|
|
|
-- , Column "member_id" "integer" Null |
|
|
|
|
-- , Column "publisher_id" "integer" NotNull |
|
|
|
|
-- , Column "device_tracker_id" "uuid" Null |
|
|
|
|
-- , Column "session_event_id" "uuid" Null |
|
|
|
|
-- , Column "page_type" "character varying(20)" NotNull |
|
|
|
|
-- , Column "referrer" "character varying(1024)" Null |
|
|
|
|
-- , Column "url" "character varying(1024)" Null |
|
|
|
|
-- , Column "referrer_id" "integer" Null |
|
|
|
|
-- ] |
|
|
|
|
-- , tableConstraints = |
|
|
|
|
-- [ PrimaryKey "id" |
|
|
|
|
-- , ForeignKey "session_events" [ ("session_event_id", "id") |
|
|
|
|
-- , ("publisher_id", "publisher_id") |
|
|
|
|
-- ] |
|
|
|
|
-- , ForeignKey "referrers" [ ("referrer_id", "id") |
|
|
|
|
-- , ("publisher_id", "publisher_id") |
|
|
|
|
-- ] |
|
|
|
|
-- ] |
|
|
|
|
-- } |
|
|
|
|
-- pageViewFact = |
|
|
|
|
-- Fact { factName = "page_view" |
|
|
|
|
-- , factTableName = "page_view_events" |
|
|
|
|
-- , factTablePersistent = True |
|
|
|
|
-- , factParentNames = [ "session" ] |
|
|
|
|
-- , factColumns = |
|
|
|
|
-- [ FactColumn "created_at" $ DimTime |
|
|
|
|
-- , FactColumn "publisher_id" $ NoDimId |
|
|
|
|
-- , FactColumn "page_type" $ DimVal "page_type" |
|
|
|
|
-- , FactColumn "referrer_id" $ DimId "referrers" |
|
|
|
|
-- , FactColumn "view_count" $ FactCount Nothing |
|
|
|
|
-- ] |
|
|
|
|
-- } |
|
|
|
|
-- referrersTable = |
|
|
|
|
-- Table { tableName = "referrers" |
|
|
|
|
-- , tableColumns = |
|
|
|
|
-- [ Column "id" "integer" NotNull |
|
|
|
|
-- , Column "publisher_id" "integer" NotNull |
|
|
|
|
-- , Column "name" "character varying(100)" NotNull |
|
|
|
|
-- ] |
|
|
|
|
-- , tableConstraints = |
|
|
|
|
-- [ PrimaryKey "id" |
|
|
|
|
-- , UniqueKey ["publisher_id", "name"] |
|
|
|
|
-- ] |
|
|
|
|
-- } |
|
|
|
|
-- tables = [sessionEventsTable, pageViewEventsTable, referrersTable] |
|
|
|
|
-- facts = [sessionFact, pageViewFact] |
|
|
|
|
-- typeDefaults = Map.fromList [ ("integer", "-1") |
|
|
|
|
-- , ("timestamp", "'00-00-00 00:00:00'") |
|
|
|
|
-- , ("character", "'__UNKNOWN_VAL__'") |
|
|
|
|
-- , ("uuid", "'00000000-0000-0000-0000-000000000000'::uuid") |
|
|
|
|
-- , ("boolean", "false") |
|
|
|
|
-- , ("json", "'{}'::json") |
|
|
|
|
-- , ("numeric", "-1") |
|
|
|
|
-- , ("text", "'__UNKNOWN_VAL__'") |
|
|
|
|
-- ] |
|
|
|
|
-- settings = defSettings { settingTableNameSuffixTemplate = "" } |
|
|
|
|
-- env = Env tables facts settings typeDefaults |
|
|
|
|
-- :} |
|
|
|
|
|
|
|
|
|
-- | |
|
|
|
|
-- |
|
|
|
|
-- >>> print $ extractFactTable env sessionFact |
|
|
|
|
-- Table fact_session_by_minute |
|
|
|
|
-- Column created_at_minute_id bigint NOT NULL |
|
|
|
|
-- Column publisher_id integer NOT NULL |
|
|
|
|
-- Column session_count integer NOT NULL |
|
|
|
|
-- Column geo_id integer NOT NULL |
|
|
|
|
-- Column user_agent_id integer NOT NULL |
|
|
|
|
-- UniqueKey (created_at_minute_id, publisher_id, geo_id, user_agent_id) |
|
|
|
|
-- <BLANKLINE> |
|
|
|
|
-- >>> print $ extractFactTable env pageViewFact |
|
|
|
|
-- Table fact_page_view_by_minute |
|
|
|
|
-- Column created_at_minute_id bigint NOT NULL |
|
|
|
|
-- Column publisher_id integer NOT NULL |
|
|
|
|
-- Column view_count integer NOT NULL |
|
|
|
|
-- Column referrer_id integer NOT NULL |
|
|
|
|
-- Column page_type_id integer NOT NULL |
|
|
|
|
-- Column geo_id integer NOT NULL |
|
|
|
|
-- Column user_agent_id integer NOT NULL |
|
|
|
|
-- UniqueKey (created_at_minute_id, publisher_id, referrer_id, page_type_id, geo_id, user_agent_id) |
|
|
|
|
-- <BLANKLINE> |
|
|
|
|
extractFactTable :: Env -> Fact -> Table |
|
|
|
|
extractFactTable env = flip runReader env . E.extractFactTable |
|
|
|
|
|
|
|
|
|
-- | |
|
|
|
|
-- |
|
|
|
|
-- >>> mapM_ print $ extractDimensionTables env sessionFact |
|
|
|
|
-- Table dim_geo |
|
|
|
|
-- Column id serial NOT NULL |
|
|
|
|
-- Column country_name character varying(50) NOT NULL |
|
|
|
|
-- Column city_name character varying(50) NOT NULL |
|
|
|
|
-- Column continent_name character varying(15) NOT NULL |
|
|
|
|
-- PrimaryKey id |
|
|
|
|
-- UniqueKey (country_name, city_name, continent_name) |
|
|
|
|
-- <BLANKLINE> |
|
|
|
|
-- Table dim_user_agent |
|
|
|
|
-- Column id serial NOT NULL |
|
|
|
|
-- Column browser_name character varying(50) NOT NULL |
|
|
|
|
-- Column os character varying(50) NOT NULL |
|
|
|
|
-- Column name character varying(100) NOT NULL |
|
|
|
|
-- PrimaryKey id |
|
|
|
|
-- UniqueKey (browser_name, os, name) |
|
|
|
|
-- <BLANKLINE> |
|
|
|
|
-- >>> mapM_ print . filter (`notElem` tables) $ extractDimensionTables env pageViewFact |
|
|
|
|
-- Table dim_page_type |
|
|
|
|
-- Column id serial NOT NULL |
|
|
|
|
-- Column page_type character varying(20) NOT NULL |
|
|
|
|
-- PrimaryKey id |
|
|
|
|
-- UniqueKey (page_type) |
|
|
|
|
-- <BLANKLINE> |
|
|
|
|
extractDimensionTables :: Env -> Fact -> [Table] |
|
|
|
|
extractDimensionTables env = flip runReader env . E.extractDimensionTables |
|
|
|
|
|
|
|
|
|
-- | |
|
|
|
|
-- |
|
|
|
|
-- >>> putStrLn . ppShow $ extractDependencies env sessionFact |
|
|
|
|
-- fromList |
|
|
|
|
-- [ ( "dim_geo" , [ "session_events" ] ) |
|
|
|
|
-- , ( "dim_user_agent" , [ "session_events" ] ) |
|
|
|
|
-- , ( "fact_session_by_minute" |
|
|
|
|
-- , [ "session_events" , "dim_user_agent" , "dim_geo" ] |
|
|
|
|
-- ) |
|
|
|
|
-- ] |
|
|
|
|
-- >>> putStrLn . ppShow $ extractDependencies env pageViewFact |
|
|
|
|
-- fromList |
|
|
|
|
-- [ ( "dim_page_type" , [ "page_view_events" ] ) |
|
|
|
|
-- , ( "fact_page_view_by_minute" |
|
|
|
|
-- , [ "page_view_events" |
|
|
|
|
-- , "session_events" |
|
|
|
|
-- , "dim_page_type" |
|
|
|
|
-- , "referrers" |
|
|
|
|
-- , "dim_user_agent" |
|
|
|
|
-- , "dim_geo" |
|
|
|
|
-- ] |
|
|
|
|
-- ) |
|
|
|
|
-- ] |
|
|
|
|
extractDependencies :: Env -> Fact -> Dependencies |
|
|
|
|
extractDependencies env = flip runReader env . E.extractDependencies |
|
|
|
|
|
|
|
|
|
-- | |
|
|
|
|
-- |
|
|
|
|
-- >>> let dimTables = filter (`notElem` tables) . nub . concatMap (extractDimensionTables env) $ facts |
|
|
|
|
-- >>> let sqls = map (dimensionTableDefnSQL env) dimTables |
|
|
|
|
-- >>> mapM_ (\sqls -> mapM_ (putStr . Text.unpack) sqls >> putStrLn "--------" ) sqls |
|
|
|
|
-- create table dim_geo ( |
|
|
|
|
-- id serial not null, |
|
|
|
|
-- country_name character varying(50) not null, |
|
|
|
|
-- city_name character varying(50) not null, |
|
|
|
|
-- continent_name character varying(15) not null |
|
|
|
|
-- ) |
|
|
|
|
-- ; |
|
|
|
|
-- <BLANKLINE> |
|
|
|
|
-- alter table dim_geo add primary key (id); |
|
|
|
|
-- <BLANKLINE> |
|
|
|
|
-- alter table dim_geo add unique (country_name, |
|
|
|
|
-- city_name, |
|
|
|
|
-- continent_name); |
|
|
|
|
-- <BLANKLINE> |
|
|
|
|
-- create index on dim_geo (country_name) |
|
|
|
|
-- ; |
|
|
|
|
-- create index on dim_geo (city_name) |
|
|
|
|
-- ; |
|
|
|
|
-- create index on dim_geo (continent_name) |
|
|
|
|
-- ; |
|
|
|
|
-- -------- |
|
|
|
|
-- create table dim_user_agent ( |
|
|
|
|
-- id serial not null, |
|
|
|
|
-- browser_name character varying(50) not null, |
|
|
|
|
-- os character varying(50) not null, |
|
|
|
|
-- name character varying(100) not null |
|
|
|
|
-- ) |
|
|
|
|
-- ; |
|
|
|
|
-- <BLANKLINE> |
|
|
|
|
-- alter table dim_user_agent add primary key (id); |
|
|
|
|
-- <BLANKLINE> |
|
|
|
|
-- alter table dim_user_agent add unique (browser_name, os, name); |
|
|
|
|
-- <BLANKLINE> |
|
|
|
|
-- create index on dim_user_agent (browser_name) |
|
|
|
|
-- ; |
|
|
|
|
-- create index on dim_user_agent (os) |
|
|
|
|
-- ; |
|
|
|
|
-- create index on dim_user_agent (name) |
|
|
|
|
-- ; |
|
|
|
|
-- -------- |
|
|
|
|
-- create table dim_page_type ( |
|
|
|
|
-- id serial not null, |
|
|
|
|
-- page_type character varying(20) not null |
|
|
|
|
-- ) |
|
|
|
|
-- ; |
|
|
|
|
-- <BLANKLINE> |
|
|
|
|
-- alter table dim_page_type add primary key (id); |
|
|
|
|
-- <BLANKLINE> |
|
|
|
|
-- alter table dim_page_type add unique (page_type); |
|
|
|
|
-- <BLANKLINE> |
|
|
|
|
-- -------- |
|
|
|
|
dimensionTableDefnSQL :: Env -> Table -> [Text] |
|
|
|
|
dimensionTableDefnSQL env = flip runReader env . G.dimensionTableDefnSQL |
|
|
|
|
|
|
|
|
|
-- | |
|
|
|
|
-- |
|
|
|
|
-- >>> let storySessionFactTable = extractFactTable env sessionFact |
|
|
|
|
-- >>> let sqls = factTableDefnSQL env sessionFact storySessionFactTable |
|
|
|
|
-- >>> mapM_ (putStr . Text.unpack) sqls |
|
|
|
|
-- create table fact_session_by_minute ( |
|
|
|
|
-- created_at_minute_id bigint not null, |
|
|
|
|
-- publisher_id integer not null, |
|
|
|
|
-- session_count integer not null, |
|
|
|
|
-- geo_id integer not null, |
|
|
|
|
-- user_agent_id integer not null |
|
|
|
|
-- ) |
|
|
|
|
-- ; |
|
|
|
|
-- <BLANKLINE> |
|
|
|
|
-- alter table fact_session_by_minute add unique (created_at_minute_id, |
|
|
|
|
-- publisher_id, |
|
|
|
|
-- geo_id, |
|
|
|
|
-- user_agent_id); |
|
|
|
|
-- <BLANKLINE> |
|
|
|
|
-- create index on fact_session_by_minute (created_at_minute_id) |
|
|
|
|
-- ; |
|
|
|
|
-- create index on fact_session_by_minute (publisher_id) |
|
|
|
|
-- ; |
|
|
|
|
-- create index on fact_session_by_minute (geo_id) |
|
|
|
|
-- ; |
|
|
|
|
-- create index on fact_session_by_minute (user_agent_id) |
|
|
|
|
-- ; |
|
|
|
|
-- >>> let pageViewFactTable = extractFactTable env pageViewFact |
|
|
|
|
-- >>> let sqls = factTableDefnSQL env pageViewFact pageViewFactTable |
|
|
|
|
-- >>> mapM_ (putStr . Text.unpack) sqls |
|
|
|
|
-- create table fact_page_view_by_minute ( |
|
|
|
|
-- created_at_minute_id bigint not null, |
|
|
|
|
-- publisher_id integer not null, |
|
|
|
|
-- view_count integer not null, |
|
|
|
|
-- referrer_id integer not null, |
|
|
|
|
-- page_type_id integer not null, |
|
|
|
|
-- geo_id integer not null, |
|
|
|
|
-- user_agent_id integer not null |
|
|
|
|
-- ) |
|
|
|
|
-- ; |
|
|
|
|
-- <BLANKLINE> |
|
|
|
|
-- alter table fact_page_view_by_minute add unique (created_at_minute_id, |
|
|
|
|
-- publisher_id, |
|
|
|
|
-- referrer_id, |
|
|
|
|
-- page_type_id, |
|
|
|
|
-- geo_id, |
|
|
|
|
-- user_agent_id); |
|
|
|
|
-- <BLANKLINE> |
|
|
|
|
-- create index on fact_page_view_by_minute (created_at_minute_id) |
|
|
|
|
-- ; |
|
|
|
|
-- create index on fact_page_view_by_minute (publisher_id) |
|
|
|
|
-- ; |
|
|
|
|
-- create index on fact_page_view_by_minute (referrer_id) |
|
|
|
|
-- ; |
|
|
|
|
-- create index on fact_page_view_by_minute (page_type_id) |
|
|
|
|
-- ; |
|
|
|
|
-- create index on fact_page_view_by_minute (geo_id) |
|
|
|
|
-- ; |
|
|
|
|
-- create index on fact_page_view_by_minute (user_agent_id) |
|
|
|
|
-- ; |
|
|
|
|
factTableDefnSQL :: Env -> Fact -> Table -> [Text] |
|
|
|
|
factTableDefnSQL env fact = flip runReader env . G.factTableDefnSQL fact |
|
|
|
|
|
|
|
|
|
-- | |
|
|
|
|
-- |
|
|
|
|
-- >>> let storySessionDimTableNames = map tableName $ extractDimensionTables env sessionFact |
|
|
|
|
-- >>> let sqls = map (dimensionTablePopulateSQL FullPopulation env sessionFact) storySessionDimTableNames |
|
|
|
|
-- >>> mapM_ (putStr . Text.unpack) sqls |
|
|
|
|
-- insert into dim_geo (country_name, city_name, continent_name) |
|
|
|
|
-- select distinct |
|
|
|
|
-- coalesce(session_events.geo_country_name,'__UNKNOWN_VAL__') as geo_country_name, |
|
|
|
|
-- coalesce(session_events.geo_city_name,'__UNKNOWN_VAL__') as geo_city_name, |
|
|
|
|
-- coalesce(session_events.geo_continent_name,'__UNKNOWN_VAL__') as geo_continent_name |
|
|
|
|
-- from |
|
|
|
|
-- session_events |
|
|
|
|
-- where |
|
|
|
|
-- (geo_country_name is not null or geo_city_name is not null or geo_continent_name is not null) |
|
|
|
|
-- and |
|
|
|
|
-- created_at < ? |
|
|
|
|
-- ; |
|
|
|
|
-- <BLANKLINE> |
|
|
|
|
-- insert into dim_user_agent (browser_name, os, name) |
|
|
|
|
-- select distinct |
|
|
|
|
-- coalesce(session_events.browser_name,'__UNKNOWN_VAL__') as browser_name, |
|
|
|
|
-- coalesce(session_events.os,'__UNKNOWN_VAL__') as os, |
|
|
|
|
-- coalesce(session_events.user_agent_name,'__UNKNOWN_VAL__') as user_agent_name |
|
|
|
|
-- from |
|
|
|
|
-- session_events |
|
|
|
|
-- where |
|
|
|
|
-- (browser_name is not null or os is not null or user_agent_name is not null) |
|
|
|
|
-- and |
|
|
|
|
-- created_at < ? |
|
|
|
|
-- ; |
|
|
|
|
-- <BLANKLINE> |
|
|
|
|
-- >>> let sqls = map (dimensionTablePopulateSQL IncrementalPopulation env sessionFact) storySessionDimTableNames |
|
|
|
|
-- >>> mapM_ (putStr . Text.unpack) sqls |
|
|
|
|
-- insert into dim_geo (country_name, city_name, continent_name) |
|
|
|
|
-- select |
|
|
|
|
-- x.* |
|
|
|
|
-- from |
|
|
|
|
-- (select distinct |
|
|
|
|
-- coalesce(session_events.geo_country_name,'__UNKNOWN_VAL__') as geo_country_name, |
|
|
|
|
-- coalesce(session_events.geo_city_name,'__UNKNOWN_VAL__') as geo_city_name, |
|
|
|
|
-- coalesce(session_events.geo_continent_name,'__UNKNOWN_VAL__') as geo_continent_name |
|
|
|
|
-- from |
|
|
|
|
-- session_events |
|
|
|
|
-- where |
|
|
|
|
-- (geo_country_name is not null or geo_city_name is not null or geo_continent_name is not null) |
|
|
|
|
-- and |
|
|
|
|
-- created_at < ? |
|
|
|
|
-- and |
|
|
|
|
-- created_at >= ?) as x |
|
|
|
|
-- left outer join |
|
|
|
|
-- dim_geo |
|
|
|
|
-- on dim_geo.country_name = x.geo_country_name |
|
|
|
|
-- and |
|
|
|
|
-- dim_geo.city_name = x.geo_city_name |
|
|
|
|
-- and |
|
|
|
|
-- dim_geo.continent_name = x.geo_continent_name |
|
|
|
|
-- where |
|
|
|
|
-- dim_geo.country_name is null and dim_geo.city_name is null |
|
|
|
|
-- and |
|
|
|
|
-- dim_geo.continent_name is null |
|
|
|
|
-- ; |
|
|
|
|
-- <BLANKLINE> |
|
|
|
|
-- insert into dim_user_agent (browser_name, os, name) |
|
|
|
|
-- select |
|
|
|
|
-- x.* |
|
|
|
|
-- from |
|
|
|
|
-- (select distinct |
|
|
|
|
-- coalesce(session_events.browser_name,'__UNKNOWN_VAL__') as browser_name, |
|
|
|
|
-- coalesce(session_events.os,'__UNKNOWN_VAL__') as os, |
|
|
|
|
-- coalesce(session_events.user_agent_name,'__UNKNOWN_VAL__') as user_agent_name |
|
|
|
|
-- from |
|
|
|
|
-- session_events |
|
|
|
|
-- where |
|
|
|
|
-- (browser_name is not null or os is not null or user_agent_name is not null) |
|
|
|
|
-- and |
|
|
|
|
-- created_at < ? |
|
|
|
|
-- and |
|
|
|
|
-- created_at >= ?) as x |
|
|
|
|
-- left outer join |
|
|
|
|
-- dim_user_agent |
|
|
|
|
-- on dim_user_agent.browser_name = x.browser_name |
|
|
|
|
-- and |
|
|
|
|
-- dim_user_agent.os = x.os |
|
|
|
|
-- and |
|
|
|
|
-- dim_user_agent.name = x.user_agent_name |
|
|
|
|
-- where |
|
|
|
|
-- dim_user_agent.browser_name is null and dim_user_agent.os is null |
|
|
|
|
-- and |
|
|
|
|
-- dim_user_agent.name is null |
|
|
|
|
-- ; |
|
|
|
|
-- <BLANKLINE> |
|
|
|
|
-- >>> let pageViewDimTableNames = map tableName . filter (`notElem` tables) $ extractDimensionTables env pageViewFact |
|
|
|
|
-- >>> let sqls = map (dimensionTablePopulateSQL FullPopulation env pageViewFact) pageViewDimTableNames |
|
|
|
|
-- >>> mapM_ (putStr . Text.unpack) sqls |
|
|
|
|
-- insert into dim_page_type (page_type) |
|
|
|
|
-- select distinct |
|
|
|
|
-- page_view_events.page_type as page_type |
|
|
|
|
-- from |
|
|
|
|
-- page_view_events |
|
|
|
|
-- where |
|
|
|
|
-- (page_type is not null) and created_at < ? |
|
|
|
|
-- ; |
|
|
|
|
-- <BLANKLINE> |
|
|
|
|
-- >>> let sqls = map (dimensionTablePopulateSQL IncrementalPopulation env pageViewFact) pageViewDimTableNames |
|
|
|
|
-- >>> mapM_ (putStr . Text.unpack) sqls |
|
|
|
|
-- insert into dim_page_type (page_type) |
|
|
|
|
-- select |
|
|
|
|
-- x.* |
|
|
|
|
-- from |
|
|
|
|
-- (select distinct |
|
|
|
|
-- page_view_events.page_type as page_type |
|
|
|
|
-- from |
|
|
|
|
-- page_view_events |
|
|
|
|
-- where |
|
|
|
|
-- (page_type is not null) and created_at < ? |
|
|
|
|
-- and |
|
|
|
|
-- created_at >= ?) as x |
|
|
|
|
-- left outer join |
|
|
|
|
-- dim_page_type |
|
|
|
|
-- on dim_page_type.page_type = x.page_type |
|
|
|
|
-- where |
|
|
|
|
-- dim_page_type.page_type is null |
|
|
|
|
-- ; |
|
|
|
|
-- <BLANKLINE> |
|
|
|
|
dimensionTablePopulateSQL :: TablePopulationMode -> Env -> Fact -> TableName -> Text |
|
|
|
|
dimensionTablePopulateSQL popMode env fact = |
|
|
|
|
flip runReader env . G.dimensionTablePopulateSQL popMode fact |
|
|
|
|
|
|
|
|
|
-- | |
|
|
|
|
-- |
|
|
|
|
-- >>> let sqls = factTablePopulateSQL FullPopulation env sessionFact |
|
|
|
|
-- >>> mapM_ (putStr . Text.unpack) sqls |
|
|
|
|
-- insert into fact_session_by_minute (created_at_minute_id, |
|
|
|
|
-- publisher_id, |
|
|
|
|
-- session_count, |
|
|
|
|
-- geo_id, |
|
|
|
|
-- user_agent_id) |
|
|
|
|
-- select |
|
|
|
|
-- cast(floor(extract(epoch from session_events.created_at) / 60) as bigint) as xxff_created_at_minute_id, |
|
|
|
|
-- session_events.publisher_id as xxff_publisher_id, |
|
|
|
|
-- count(*) as xxff_session_count, |
|
|
|
|
-- coalesce((select |
|
|
|
|
-- id |
|
|
|
|
-- from |
|
|
|
|
-- dim_geo as dim_geo |
|
|
|
|
-- where |
|
|
|
|
-- dim_geo.country_name = coalesce(session_events.geo_country_name,'__UNKNOWN_VAL__') |
|
|
|
|
-- and |
|
|
|
|
-- dim_geo.city_name = coalesce(session_events.geo_city_name,'__UNKNOWN_VAL__') |
|
|
|
|
-- and |
|
|
|
|
-- dim_geo.continent_name = coalesce(session_events.geo_continent_name,'__UNKNOWN_VAL__')),-1) as xxff_geo_id, |
|
|
|
|
-- coalesce((select |
|
|
|
|
-- id |
|
|
|
|
-- from |
|
|
|
|
-- dim_user_agent as dim_user_agent |
|
|
|
|
-- where |
|
|
|
|
-- dim_user_agent.browser_name = coalesce(session_events.browser_name,'__UNKNOWN_VAL__') |
|
|
|
|
-- and |
|
|
|
|
-- dim_user_agent.os = coalesce(session_events.os,'__UNKNOWN_VAL__') |
|
|
|
|
-- and |
|
|
|
|
-- dim_user_agent.name = coalesce(session_events.user_agent_name,'__UNKNOWN_VAL__')),-1) as xxff_user_agent_id |
|
|
|
|
-- from |
|
|
|
|
-- session_events |
|
|
|
|
-- where |
|
|
|
|
-- session_events.created_at < ? |
|
|
|
|
-- group by |
|
|
|
|
-- xxff_created_at_minute_id, |
|
|
|
|
-- xxff_publisher_id, |
|
|
|
|
-- xxff_geo_id, |
|
|
|
|
-- xxff_user_agent_id |
|
|
|
|
-- ; |
|
|
|
|
-- <BLANKLINE> |
|
|
|
|
-- >>> let sqls = factTablePopulateSQL IncrementalPopulation env sessionFact |
|
|
|
|
-- >>> mapM_ (putStr . Text.unpack) sqls |
|
|
|
|
-- insert into fact_session_by_minute (created_at_minute_id, |
|
|
|
|
-- publisher_id, |
|
|
|
|
-- session_count, |
|
|
|
|
-- geo_id, |
|
|
|
|
-- user_agent_id) |
|
|
|
|
-- select |
|
|
|
|
-- cast(floor(extract(epoch from session_events.created_at) / 60) as bigint) as xxff_created_at_minute_id, |
|
|
|
|
-- session_events.publisher_id as xxff_publisher_id, |
|
|
|
|
-- count(*) as xxff_session_count, |
|
|
|
|
-- coalesce((select |
|
|
|
|
-- id |
|
|
|
|
-- from |
|
|
|
|
-- dim_geo as dim_geo |
|
|
|
|
-- where |
|
|
|
|
-- dim_geo.country_name = coalesce(session_events.geo_country_name,'__UNKNOWN_VAL__') |
|
|
|
|
-- and |
|
|
|
|
-- dim_geo.city_name = coalesce(session_events.geo_city_name,'__UNKNOWN_VAL__') |
|
|
|
|
-- and |
|
|
|
|
-- dim_geo.continent_name = coalesce(session_events.geo_continent_name,'__UNKNOWN_VAL__')),-1) as xxff_geo_id, |
|
|
|
|
-- coalesce((select |
|
|
|
|
-- id |
|
|
|
|
-- from |
|
|
|
|
-- dim_user_agent as dim_user_agent |
|
|
|
|
-- where |
|
|
|
|
-- dim_user_agent.browser_name = coalesce(session_events.browser_name,'__UNKNOWN_VAL__') |
|
|
|
|
-- and |
|
|
|
|
-- dim_user_agent.os = coalesce(session_events.os,'__UNKNOWN_VAL__') |
|
|
|
|
-- and |
|
|
|
|
-- dim_user_agent.name = coalesce(session_events.user_agent_name,'__UNKNOWN_VAL__')),-1) as xxff_user_agent_id |
|
|
|
|
-- from |
|
|
|
|
-- session_events |
|
|
|
|
-- where |
|
|
|
|
-- session_events.created_at < ? and session_events.created_at >= ? |
|
|
|
|
-- group by |
|
|
|
|
-- xxff_created_at_minute_id, |
|
|
|
|
-- xxff_publisher_id, |
|
|
|
|
-- xxff_geo_id, |
|
|
|
|
-- xxff_user_agent_id |
|
|
|
|
-- ; |
|
|
|
|
-- <BLANKLINE> |
|
|
|
|
-- >>> let sqls = factTablePopulateSQL FullPopulation env pageViewFact |
|
|
|
|
-- >>> mapM_ (putStr . Text.unpack) sqls |
|
|
|
|
-- insert into fact_page_view_by_minute (created_at_minute_id, |
|
|
|
|
-- publisher_id, |
|
|
|
|
-- view_count, |
|
|
|
|
-- referrer_id, |
|
|
|
|
-- page_type_id, |
|
|
|
|
-- geo_id, |
|
|
|
|
-- user_agent_id) |
|
|
|
|
-- select |
|
|
|
|
-- cast(floor(extract(epoch from page_view_events.created_at) / 60) as bigint) as xxff_created_at_minute_id, |
|
|
|
|
-- page_view_events.publisher_id as xxff_publisher_id, |
|
|
|
|
-- count(*) as xxff_view_count, |
|
|
|
|
-- coalesce(page_view_events.referrer_id,-1) as xxff_referrer_id, |
|
|
|
|
-- coalesce((select |
|
|
|
|
-- id |
|
|
|
|
-- from |
|
|
|
|
-- dim_page_type as dim_page_type |
|
|
|
|
-- where |
|
|
|
|
-- dim_page_type.page_type = page_view_events.page_type),-1) as xxff_page_type_id, |
|
|
|
|
-- coalesce((select |
|
|
|
|
-- id |
|
|
|
|
-- from |
|
|
|
|
-- dim_geo as dim_geo |
|
|
|
|
-- where |
|
|
|
|
-- dim_geo.country_name = coalesce(session_events.geo_country_name,'__UNKNOWN_VAL__') |
|
|
|
|
-- and |
|
|
|
|
-- dim_geo.city_name = coalesce(session_events.geo_city_name,'__UNKNOWN_VAL__') |
|
|
|
|
-- and |
|
|
|
|
-- dim_geo.continent_name = coalesce(session_events.geo_continent_name,'__UNKNOWN_VAL__')),-1) as xxff_geo_id, |
|
|
|
|
-- coalesce((select |
|
|
|
|
-- id |
|
|
|
|
-- from |
|
|
|
|
-- dim_user_agent as dim_user_agent |
|
|
|
|
-- where |
|
|
|
|
-- dim_user_agent.browser_name = coalesce(session_events.browser_name,'__UNKNOWN_VAL__') |
|
|
|
|
-- and |
|
|
|
|
-- dim_user_agent.os = coalesce(session_events.os,'__UNKNOWN_VAL__') |
|
|
|
|
-- and |
|
|
|
|
-- dim_user_agent.name = coalesce(session_events.user_agent_name,'__UNKNOWN_VAL__')),-1) as xxff_user_agent_id |
|
|
|
|
-- from |
|
|
|
|
-- page_view_events |
|
|
|
|
-- left outer join |
|
|
|
|
-- session_events |
|
|
|
|
-- on page_view_events.session_event_id = session_events.id |
|
|
|
|
-- and |
|
|
|
|
-- page_view_events.publisher_id = session_events.publisher_id |
|
|
|
|
-- where |
|
|
|
|
-- page_view_events.created_at < ? |
|
|
|
|
-- group by |
|
|
|
|
-- xxff_created_at_minute_id, |
|
|
|
|
-- xxff_publisher_id, |
|
|
|
|
-- xxff_referrer_id, |
|
|
|
|
-- xxff_page_type_id, |
|
|
|
|
-- xxff_geo_id, |
|
|
|
|
-- xxff_user_agent_id |
|
|
|
|
-- ; |
|
|
|
|
-- <BLANKLINE> |
|
|
|
|
factTablePopulateSQL :: TablePopulationMode -> Env -> Fact -> [Text] |
|
|
|
|
factTablePopulateSQL popMode env = |
|
|
|
|
flip runReader env . G.factTablePopulateSQL popMode |
|
|
|
|
|
|
|
|
|
-- | |
|
|
|
|
-- |
|
|
|
|
-- >>> concatMap (validateTable env) tables |
|
|
|
|
-- [] |
|
|
|
|
validateTable :: Env -> Table -> [ValidationError] |
|
|
|
|
validateTable env = flip runReader env . V.validateTable |
|
|
|
|
|
|
|
|
|
-- | |
|
|
|
|
-- |
|
|
|
|
-- >>> concatMap (validateFact env) facts |
|
|
|
|
-- [] |
|
|
|
|
validateFact :: Env -> Fact -> [ValidationError] |
|
|
|
|
validateFact env = flip runReader env . V.validateFact |
|
|
|
|