From 28ff8a99fbce406d430e96487e827bd964ffa8a3 Mon Sep 17 00:00:00 2001 From: Abhinav Sarkar Date: Fri, 1 Jan 2016 17:15:22 +0530 Subject: [PATCH 01/10] Changes Create generator to use hssqlppp internally. --- app/Main.hs | 3 +- ringo.cabal | 1 + src/Ringo/Generator/Create.hs | 55 ++++++++++++++++++++--------------- src/Ringo/Generator/Sql.hs | 27 +++++++++++++++++ 4 files changed, 61 insertions(+), 25 deletions(-) create mode 100644 src/Ringo/Generator/Sql.hs diff --git a/app/Main.hs b/app/Main.hs index d475181..c2122c2 100644 --- a/app/Main.hs +++ b/app/Main.hs @@ -7,7 +7,6 @@ import qualified Data.Text as Text import Data.Aeson (encode) import Data.Char (toLower) import Data.List (nub) -import Data.Monoid ((<>)) import Control.Monad (forM_) import System.Directory (createDirectoryIfMissing) import System.FilePath ((), (<.>)) @@ -80,4 +79,4 @@ writeFiles outputDir env@Env{..} = do , factTablePopulateSQLs IncRefresh $ factTablePopulateSQL IncrementalPopulation ] - sqlStr s = Text.unpack $ s <> ";\n" + sqlStr = Text.unpack diff --git a/ringo.cabal b/ringo.cabal index dc2d060..f75207d 100644 --- a/ringo.cabal +++ b/ringo.cabal @@ -22,6 +22,7 @@ library Ringo.Validator, Ringo.Extractor.Internal, Ringo.Generator.Internal, + Ringo.Generator.Sql, Ringo.Generator.Create, Ringo.Generator.Populate.Dimension, Ringo.Generator.Populate.Fact, diff --git a/src/Ringo/Generator/Create.hs b/src/Ringo/Generator/Create.hs index cd8abf1..adf791e 100644 --- a/src/Ringo/Generator/Create.hs +++ b/src/Ringo/Generator/Create.hs @@ -5,43 +5,54 @@ module Ringo.Generator.Create (tableDefnSQL, factTableDefnSQL) where import Control.Applicative ((<$>)) #endif -import Control.Monad.Reader (Reader, asks) -import Data.Monoid ((<>)) -import Data.Text (Text) +import Control.Monad.Reader (Reader, asks) +import Database.HsSqlPpp.Syntax ( Statement(..), RowConstraint(..), AlterTableAction(..) + , AlterTableOperation(..), Constraint(..), Cascade(..) + ) +import Data.Monoid ((<>)) +import Data.Text (Text) import Ringo.Extractor.Internal -import Ringo.Generator.Internal +import Ringo.Generator.Sql import Ringo.Types import Ringo.Utils tableDefnSQL :: Table -> Reader Env [Text] -tableDefnSQL Table {..} = do +tableDefnSQL table = map ppSQL <$> tableDefnSQL' table + +tableDefnSQL' :: Table -> Reader Env [Statement] +tableDefnSQL' Table {..} = do Settings {..} <- asks envSettings let tabName = tableName <> settingTableNameSuffixTemplate - tableSQL = "CREATE TABLE " <> tabName <> " (\n" - <> (joinColumnNames . map columnDefnSQL $ tableColumns) - <> "\n)" + tableSQL = CreateTable ea (name tabName) (map columnDefnSQL tableColumns) [] Nothing columnDefnSQL Column {..} = - columnName <> " " <> columnType <> " " <> nullableDefnSQL columnNullable + att columnName columnType $ nullableDefnSQL columnNullable - nullableDefnSQL Null = "NULL" - nullableDefnSQL NotNull = "NOT NULL" + nullableDefnSQL Null = NullConstraint ea "" + nullableDefnSQL NotNull = NotNullConstraint ea "" constraintDefnSQL constraint = - let alterTableSQL = "ALTER TABLE ONLY " <> tabName <> " ADD " - in case constraint of - PrimaryKey cName -> [ alterTableSQL <> "PRIMARY KEY (" <> cName <> ")" ] - ForeignKey oTableName cNamePairs -> - [ alterTableSQL <> "FOREIGN KEY (" <> joinColumnNames (map fst cNamePairs) <> ") REFERENCES " - <> oTableName <> " (" <> joinColumnNames (map snd cNamePairs) <> ")" ] - UniqueKey cNames -> ["CREATE UNIQUE INDEX ON " <> tabName <> " (" <> joinColumnNames cNames <> ")"] + let constr = case constraint of + PrimaryKey cName -> PrimaryKeyConstraint ea "" [nmc cName] + ForeignKey oTableName cNamePairs -> + ReferenceConstraint ea "" (map (nmc . fst) cNamePairs) + (name oTableName) (map (nmc . snd) cNamePairs) Restrict Restrict + UniqueKey cNames -> UniqueConstraint ea "" $ map nmc cNames - return $ tableSQL : concatMap constraintDefnSQL tableConstraints + in AlterTable ea (name tabName) $ AlterTableActions ea [AddConstraint ea constr] + + return $ tableSQL : map constraintDefnSQL tableConstraints factTableDefnSQL :: Fact -> Table -> Reader Env [Text] factTableDefnSQL fact table = do + ds <- map ppSQL <$> tableDefnSQL' table + is <- map (\st -> ppSQL st <> ";\n") <$> factTableIndexSQL' fact table + return $ ds ++ is + +factTableIndexSQL' :: Fact -> Table -> Reader Env [Statement] +factTableIndexSQL' fact table = do Settings {..} <- asks envSettings allDims <- extractAllDimensionTables fact @@ -53,8 +64,6 @@ factTableDefnSQL fact table = do dimCols = [ factDimFKIdColumnName settingDimPrefix settingDimTableIdColumnName tableName | (_, Table {..}) <- allDims ] - indexSQLs = [ "CREATE INDEX ON " <> tableName table <> settingTableNameSuffixTemplate - <> " USING btree (" <> col <> ")" - | col <- factCols ++ dimCols ] + return [ CreateIndexTSQL ea (nmc "") (name $ tableName table <> settingTableNameSuffixTemplate) [nmc col] + | col <- factCols ++ dimCols ] - (++ indexSQLs) <$> tableDefnSQL table diff --git a/src/Ringo/Generator/Sql.hs b/src/Ringo/Generator/Sql.hs new file mode 100644 index 0000000..1c0692b --- /dev/null +++ b/src/Ringo/Generator/Sql.hs @@ -0,0 +1,27 @@ +module Ringo.Generator.Sql where + +import qualified Data.Text as Text +import qualified Data.Text.Lazy as TL + + +import Database.HsSqlPpp.Annotation +import Database.HsSqlPpp.Dialect (postgresDialect) +import Database.HsSqlPpp.Pretty +import Database.HsSqlPpp.Syntax +import Data.Text (Text) + +ea :: Annotation +ea = emptyAnnotation + +name :: Text -> Name +name n = Name ea [nmc n] + +nmc :: Text -> NameComponent +nmc = Nmc . Text.unpack + +att :: Text -> Text -> RowConstraint -> AttributeDef +att nam typ constr = + AttributeDef ea (nmc nam) (SimpleTypeName ea $ name typ) Nothing [constr] + +ppSQL :: Statement -> Text +ppSQL st = TL.toStrict $ prettyStatements (PrettyFlags postgresDialect) [st] From c021ffb845b24615c830de420465c96033f047b4 Mon Sep 17 00:00:00 2001 From: Abhinav Sarkar Date: Fri, 1 Jan 2016 20:57:54 +0530 Subject: [PATCH 02/10] Adds doctests. --- app/Main.hs | 2 + app/Ringo/ArgParser.hs | 2 + app/Ringo/InputParser.hs | 3 + ringo.cabal | 10 +- src/Ringo.hs | 190 +++++++++++++++++++++- src/Ringo/Extractor.hs | 2 + src/Ringo/Extractor/Internal.hs | 4 + src/Ringo/Generator/Create.hs | 3 + src/Ringo/Generator/Internal.hs | 2 + src/Ringo/Generator/Populate/Dimension.hs | 3 + src/Ringo/Generator/Populate/Fact.hs | 5 + src/Ringo/Generator/Sql.hs | 1 - src/Ringo/Types.hs | 39 ++++- src/Ringo/Utils.hs | 3 + src/Ringo/Validator.hs | 3 + test/Spec.hs | 2 - test/doctests.hs | 2 + 17 files changed, 260 insertions(+), 16 deletions(-) delete mode 100644 test/Spec.hs create mode 100644 test/doctests.hs diff --git a/app/Main.hs b/app/Main.hs index c2122c2..079ee28 100644 --- a/app/Main.hs +++ b/app/Main.hs @@ -1,3 +1,5 @@ +{-# LANGUAGE OverloadedStrings #-} +{-# LANGUAGE RecordWildCards #-} module Main where import qualified Data.ByteString.Lazy as BS diff --git a/app/Ringo/ArgParser.hs b/app/Ringo/ArgParser.hs index fe7726a..7bdfada 100644 --- a/app/Ringo/ArgParser.hs +++ b/app/Ringo/ArgParser.hs @@ -1,3 +1,5 @@ +{-# LANGUAGE OverloadedStrings #-} +{-# LANGUAGE RecordWildCards #-} module Ringo.ArgParser (ProgArgs(..), parseArgs) where import qualified Data.Text as Text diff --git a/app/Ringo/InputParser.hs b/app/Ringo/InputParser.hs index 0ae0912..6182bfe 100644 --- a/app/Ringo/InputParser.hs +++ b/app/Ringo/InputParser.hs @@ -1,3 +1,6 @@ +{-# LANGUAGE OverloadedStrings #-} +{-# LANGUAGE RecordWildCards #-} +{-# LANGUAGE CPP #-} module Ringo.InputParser (parseInput) where import qualified Data.Text as Text diff --git a/ringo.cabal b/ringo.cabal index f75207d..109c8cf 100644 --- a/ringo.cabal +++ b/ringo.cabal @@ -34,8 +34,6 @@ library raw-strings-qq >=1.0 && <1.2, hssqlppp ==0.5.23 ghc-options: -Wall -fno-warn-unused-do-bind -funbox-strict-fields -fno-warn-orphans -O2 - default-extensions: OverloadedStrings, RecordWildCards, ScopedTypeVariables, BangPatterns, - TupleSections, CPP, NamedFieldPuns default-language: Haskell2010 executable ringo @@ -55,16 +53,16 @@ executable ringo aeson >=0.8 && <0.11, ringo ghc-options: -Wall -fno-warn-unused-do-bind -funbox-strict-fields -fno-warn-orphans -O2 - default-extensions: OverloadedStrings, RecordWildCards, ScopedTypeVariables, BangPatterns, - TupleSections, CPP, NamedFieldPuns default-language: Haskell2010 test-suite ringo-test type: exitcode-stdio-1.0 - hs-source-dirs: test - main-is: Spec.hs + hs-source-dirs: test, src + main-is: doctests.hs build-depends: base , ringo + , pretty-show >=1.6 && <1.7 + , doctest >=0.9 && <0.11 default-language: Haskell2010 source-repository head diff --git a/src/Ringo.hs b/src/Ringo.hs index 9306bf7..2d8a3c2 100644 --- a/src/Ringo.hs +++ b/src/Ringo.hs @@ -1,5 +1,11 @@ +{-# LANGUAGE OverloadedStrings #-} +{-# LANGUAGE RecordWildCards #-} module Ringo - ( module Ringo.Types + ( -- | The examples in this module assume the following code has been run. + -- The :{ and :} will only work in GHCi. + + -- $setup + module Ringo.Types , extractFactTable , extractDimensionTables , extractDependencies @@ -19,18 +25,200 @@ import qualified Ringo.Extractor as E import qualified Ringo.Generator as G import qualified Ringo.Validator as V +-- $setup +-- >>> :set -XOverloadedStrings +-- >>> import Ringo +-- >>> import qualified Data.Map as Map +-- >>> import qualified Data.Text as Text +-- >>> import Text.Show.Pretty +-- >>> :{ +--let sessionEventsTable = +-- Table { tableName = "session_events" +-- , tableColumns = +-- [ Column "id" "uuid" NotNull +-- , Column "created_at" "timestamp without time zone" Null +-- , Column "member_id" "integer" Null +-- , Column "publisher_id" "integer" NotNull +-- , Column "user_agent" "character varying(1024)" Null +-- , Column "browser_name" "character varying(50)" Null +-- , Column "os" "character varying(50)" Null +-- , Column "geo_country_name" "character varying(50)" Null +-- , Column "geo_city_name" "character varying(50)" Null +-- , Column "geo_continent_name" "character varying(15)" Null +-- , Column "geo_most_specific_subdivision_name" "character varying(100)" Null +-- , Column "geo_longitude" "numeric(9,6)" Null +-- , Column "geo_latitude" "numeric(9,6)" Null +-- , Column "geo_time_zone" "character varying(20)" Null +-- , Column "geo_postal_code" "character varying(20)" Null +-- , Column "user_agent_name" "character varying(100)" Null +-- , Column "user_agent_type" "character varying(15)" Null +-- , Column "user_agent_version" "character varying(100)" Null +-- , Column "user_agent_device" "character varying(15)" Null +-- ] +-- , tableConstraints = +-- [ PrimaryKey "id" ] +-- } +-- sessionFact = +-- Fact { factName = "session" +-- , factTableName = "session_events" +-- , factParentNames = [] +-- , factColumns = +-- [ DimTime "created_at" +-- , NoDimId "publisher_id" +-- , DimVal "user_agent" "browser_name" +-- , DimVal "user_agent" "os" +-- , DimVal "user_agent" "user_agent_name" +-- , DimVal "user_agent" "user_agent_type" +-- , DimVal "user_agent" "user_agent_device" +-- , DimVal "geo" "geo_country_name" +-- , DimVal "geo" "geo_city_name" +-- , DimVal "geo" "geo_continent_name" +-- , DimVal "geo" "geo_most_specific_subdivision_name" +-- , DimVal "geo" "geo_time_zone" +-- , FactCount Nothing "session_count" +-- ] +-- } +-- tables = [sessionEventsTable] +-- facts = [sessionFact] +-- typeDefaults = Map.fromList [ ("integer", "-1") +-- , ("timestamp", "'00-00-00 00:00:00'") +-- , ("character", "'__UNKNOWN_VAL__'") +-- , ("uuid", "'00000000-0000-0000-0000-000000000000'::uuid") +-- , ("boolean", "false") +-- , ("json", "'{}'::json") +-- , ("numeric", "-1") +-- , ("text", "'__UNKNOWN_VAL__'") +-- ] +-- settings = defSettings { settingTableNameSuffixTemplate = "" } +-- env = Env tables facts settings typeDefaults +-- :} + +-- | +-- +-- >>> print $ extractFactTable env sessionFact +-- Table fact_session_by_minute +-- Column created_at_minute_id bigint NOT NULL +-- Column publisher_id integer NOT NULL +-- Column session_count integer NOT NULL +-- Column geo_id integer NOT NULL +-- Column user_agent_id integer NOT NULL +-- UniqueKey (created_at_minute_id, publisher_id, geo_id, user_agent_id) +-- extractFactTable :: Env -> Fact -> Table extractFactTable env = flip runReader env . E.extractFactTable +-- | +-- +-- >>> mapM_ print $ extractDimensionTables env sessionFact +-- Table dim_geo +-- Column id serial NOT NULL +-- Column country_name character varying(50) NOT NULL +-- Column city_name character varying(50) NOT NULL +-- Column continent_name character varying(15) NOT NULL +-- Column most_specific_subdivision_name character varying(100) NOT NULL +-- Column time_zone character varying(20) NOT NULL +-- PrimaryKey id +-- UniqueKey (country_name, city_name, continent_name, most_specific_subdivision_name, time_zone) +-- +-- Table dim_user_agent +-- Column id serial NOT NULL +-- Column browser_name character varying(50) NOT NULL +-- Column os character varying(50) NOT NULL +-- Column name character varying(100) NOT NULL +-- Column type character varying(15) NOT NULL +-- Column device character varying(15) NOT NULL +-- PrimaryKey id +-- UniqueKey (browser_name, os, name, type, device) +-- extractDimensionTables :: Env -> Fact -> [Table] extractDimensionTables env = flip runReader env . E.extractDimensionTables +-- | +-- +-- >>> putStrLn . ppShow $ extractDependencies env sessionFact +-- fromList +-- [ ( "dim_geo" , [ "session_events" ] ) +-- , ( "dim_user_agent" , [ "session_events" ] ) +-- , ( "fact_session_by_minute" +-- , [ "session_events" , "dim_user_agent" , "dim_geo" ] +-- ) +-- ] extractDependencies :: Env -> Fact -> Dependencies extractDependencies env = flip runReader env . E.extractDependencies +-- | +-- +-- >>> let storySessionDimTables = extractDimensionTables env sessionFact +-- >>> let sqls = map (tableDefnSQL env) storySessionDimTables +-- >>> mapM_ (\sqls -> mapM_ (putStr . Text.unpack) sqls >> putStrLn "--------" ) sqls +-- create table dim_geo ( +-- id serial not null, +-- country_name character varying(50) not null, +-- city_name character varying(50) not null, +-- continent_name character varying(15) not null, +-- most_specific_subdivision_name character varying(100) not null, +-- time_zone character varying(20) not null +-- ) +-- ; +-- +-- alter table dim_geo add primary key (id); +-- +-- alter table dim_geo add unique (country_name, +-- city_name, +-- continent_name, +-- most_specific_subdivision_name, +-- time_zone); +-- +-- -------- +-- create table dim_user_agent ( +-- id serial not null, +-- browser_name character varying(50) not null, +-- os character varying(50) not null, +-- name character varying(100) not null, +-- type character varying(15) not null, +-- device character varying(15) not null +-- ) +-- ; +-- +-- alter table dim_user_agent add primary key (id); +-- +-- alter table dim_user_agent add unique (browser_name, +-- os, +-- name, +-- type, +-- device); +-- +-- -------- tableDefnSQL :: Env -> Table -> [Text] tableDefnSQL env = flip runReader env . G.tableDefnSQL +-- | +-- +-- >>> let storySessionFactTable = extractFactTable env sessionFact +-- >>> let sqls = factTableDefnSQL env sessionFact storySessionFactTable +-- >>> mapM_ (putStr . Text.unpack) sqls +-- create table fact_session_by_minute ( +-- created_at_minute_id bigint not null, +-- publisher_id integer not null, +-- session_count integer not null, +-- geo_id integer not null, +-- user_agent_id integer not null +-- ) +-- ; +-- +-- alter table fact_session_by_minute add unique (created_at_minute_id, +-- publisher_id, +-- geo_id, +-- user_agent_id); +-- +-- create index on fact_session_by_minute (created_at_minute_id) +-- ; +-- create index on fact_session_by_minute (publisher_id) +-- ; +-- create index on fact_session_by_minute (geo_id) +-- ; +-- create index on fact_session_by_minute (user_agent_id) +-- ; factTableDefnSQL :: Env -> Fact -> Table -> [Text] factTableDefnSQL env fact = flip runReader env . G.factTableDefnSQL fact diff --git a/src/Ringo/Extractor.hs b/src/Ringo/Extractor.hs index 398a676..2abcfd4 100644 --- a/src/Ringo/Extractor.hs +++ b/src/Ringo/Extractor.hs @@ -1,3 +1,5 @@ +{-# LANGUAGE OverloadedStrings #-} +{-# LANGUAGE RecordWildCards #-} module Ringo.Extractor ( extractDimensionTables , extractAllDimensionTables diff --git a/src/Ringo/Extractor/Internal.hs b/src/Ringo/Extractor/Internal.hs index c6af1f4..1b5c59c 100644 --- a/src/Ringo/Extractor/Internal.hs +++ b/src/Ringo/Extractor/Internal.hs @@ -1,3 +1,7 @@ +{-# LANGUAGE OverloadedStrings #-} +{-# LANGUAGE RecordWildCards #-} +{-# LANGUAGE TupleSections #-} +{-# LANGUAGE CPP #-} module Ringo.Extractor.Internal where import qualified Data.Map as Map diff --git a/src/Ringo/Generator/Create.hs b/src/Ringo/Generator/Create.hs index adf791e..5570f42 100644 --- a/src/Ringo/Generator/Create.hs +++ b/src/Ringo/Generator/Create.hs @@ -1,3 +1,6 @@ +{-# LANGUAGE OverloadedStrings #-} +{-# LANGUAGE RecordWildCards #-} +{-# LANGUAGE CPP #-} module Ringo.Generator.Create (tableDefnSQL, factTableDefnSQL) where #if MIN_VERSION_base(4,8,0) diff --git a/src/Ringo/Generator/Internal.hs b/src/Ringo/Generator/Internal.hs index fd8e77c..3b776f8 100644 --- a/src/Ringo/Generator/Internal.hs +++ b/src/Ringo/Generator/Internal.hs @@ -1,3 +1,5 @@ +{-# LANGUAGE OverloadedStrings #-} +{-# LANGUAGE RecordWildCards #-} module Ringo.Generator.Internal where import qualified Data.Map as Map diff --git a/src/Ringo/Generator/Populate/Dimension.hs b/src/Ringo/Generator/Populate/Dimension.hs index a5cc5f7..eead3b1 100644 --- a/src/Ringo/Generator/Populate/Dimension.hs +++ b/src/Ringo/Generator/Populate/Dimension.hs @@ -1,3 +1,6 @@ +{-# LANGUAGE OverloadedStrings #-} +{-# LANGUAGE RecordWildCards #-} +{-# LANGUAGE CPP #-} module Ringo.Generator.Populate.Dimension (dimensionTablePopulateSQL) where import qualified Data.Text as Text diff --git a/src/Ringo/Generator/Populate/Fact.hs b/src/Ringo/Generator/Populate/Fact.hs index fe3901b..75d4c7a 100644 --- a/src/Ringo/Generator/Populate/Fact.hs +++ b/src/Ringo/Generator/Populate/Fact.hs @@ -1,3 +1,8 @@ +{-# LANGUAGE OverloadedStrings #-} +{-# LANGUAGE RecordWildCards #-} +{-# LANGUAGE ScopedTypeVariables #-} +{-# LANGUAGE CPP #-} +{-# LANGUAGE NamedFieldPuns #-} {-# LANGUAGE QuasiQuotes #-} module Ringo.Generator.Populate.Fact (factTablePopulateSQL) where diff --git a/src/Ringo/Generator/Sql.hs b/src/Ringo/Generator/Sql.hs index 1c0692b..2a8ed41 100644 --- a/src/Ringo/Generator/Sql.hs +++ b/src/Ringo/Generator/Sql.hs @@ -3,7 +3,6 @@ module Ringo.Generator.Sql where import qualified Data.Text as Text import qualified Data.Text.Lazy as TL - import Database.HsSqlPpp.Annotation import Database.HsSqlPpp.Dialect (postgresDialect) import Database.HsSqlPpp.Pretty diff --git a/src/Ringo/Types.hs b/src/Ringo/Types.hs index e2e09b6..77244d4 100644 --- a/src/Ringo/Types.hs +++ b/src/Ringo/Types.hs @@ -1,32 +1,59 @@ +{-# LANGUAGE OverloadedStrings #-} +{-# LANGUAGE RecordWildCards #-} +{-# LANGUAGE BangPatterns #-} module Ringo.Types where import qualified Data.Text as Text -import Data.Map (Map) -import Data.Text (Text) +import Data.Map (Map) +import Data.Monoid ((<>)) +import Data.Text (Text) + +showColNames :: [Text] -> String +showColNames cols = Text.unpack $ "(" <> Text.intercalate ", " cols <> ")" type ColumnName = Text type ColumnType = Text type TableName = Text -data Nullable = Null | NotNull deriving (Eq, Enum, Show) +data Nullable = Null | NotNull deriving (Eq, Enum) + +instance Show Nullable where + show Null = "NULL" + show NotNull = "NOT NULL" data Column = Column { columnName :: !ColumnName , columnType :: !ColumnType , columnNullable :: !Nullable - } deriving (Eq, Show) + } deriving (Eq) + +instance Show Column where + show Column {..} = "Column " + ++ Text.unpack columnName ++ " " + ++ Text.unpack columnType ++ " " + ++ show columnNullable data TableConstraint = PrimaryKey !ColumnName | UniqueKey ![ColumnName] | ForeignKey !TableName ![(ColumnName, ColumnName)] - deriving (Eq, Show) + deriving (Eq) + +instance Show TableConstraint where + show (PrimaryKey col) = "PrimaryKey " ++ Text.unpack col + show (UniqueKey cols) = "UniqueKey " ++ showColNames cols + show (ForeignKey tName colMap) = "ForeignKey " ++ showColNames (map fst colMap) ++ " " + ++ Text.unpack tName ++ " " ++ showColNames (map snd colMap) data Table = Table { tableName :: !TableName , tableColumns :: ![Column] , tableConstraints :: ![TableConstraint] - } deriving (Eq, Show) + } deriving (Eq) + +instance Show Table where + show Table {..} = + unlines $ ("Table " ++ Text.unpack tableName) : (map show tableColumns) ++ (map show tableConstraints) data TimeUnit = Second | Minute | Hour | Day | Week deriving (Eq, Enum, Show, Read) diff --git a/src/Ringo/Utils.hs b/src/Ringo/Utils.hs index 69400d6..cc94de9 100644 --- a/src/Ringo/Utils.hs +++ b/src/Ringo/Utils.hs @@ -1,3 +1,6 @@ +{-# LANGUAGE OverloadedStrings #-} +{-# LANGUAGE RecordWildCards #-} +{-# LANGUAGE CPP #-} module Ringo.Utils where import qualified Control.Arrow as Arrow diff --git a/src/Ringo/Validator.hs b/src/Ringo/Validator.hs index d00bda9..e3c18e5 100644 --- a/src/Ringo/Validator.hs +++ b/src/Ringo/Validator.hs @@ -1,3 +1,6 @@ +{-# LANGUAGE OverloadedStrings #-} +{-# LANGUAGE RecordWildCards #-} +{-# LANGUAGE CPP #-} module Ringo.Validator ( validateTable , validateFact diff --git a/test/Spec.hs b/test/Spec.hs deleted file mode 100644 index cd4753f..0000000 --- a/test/Spec.hs +++ /dev/null @@ -1,2 +0,0 @@ -main :: IO () -main = putStrLn "Test suite not yet implemented" diff --git a/test/doctests.hs b/test/doctests.hs new file mode 100644 index 0000000..1d68649 --- /dev/null +++ b/test/doctests.hs @@ -0,0 +1,2 @@ +import Test.DocTest +main = doctest ["-isrc", "Ringo"] From 2be336ba41e5b0bb71c477bef0c11213dca4a9e7 Mon Sep 17 00:00:00 2001 From: Abhinav Sarkar Date: Fri, 1 Jan 2016 21:02:10 +0530 Subject: [PATCH 03/10] Changes travis config to run tests --- .travis.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.travis.yml b/.travis.yml index 2a7a7c3..26f6fb0 100644 --- a/.travis.yml +++ b/.travis.yml @@ -28,7 +28,7 @@ before_install: # This line does all of the work: installs GHC if necessary, build the library, # executables, and test suites, and runs the test suites. --no-terminal works # around some quirks in Travis's terminal implementation. -script: stack $ARGS --no-terminal --install-ghc install +script: stack $ARGS --no-terminal --install-ghc install --test --fast # Caching so the next build will be fast too. cache: From 4fe1006d0c440780957d676ad580525292b5e5c7 Mon Sep 17 00:00:00 2001 From: Abhinav Sarkar Date: Sat, 2 Jan 2016 02:59:24 +0530 Subject: [PATCH 04/10] Makes tests run only on ghc 7.10. - doctest is unable to resolve cabal macros on ghc 7.8 so disabling tests on ghc 7.8. --- ringo.cabal | 11 +++++++---- src/Ringo/Generator/Populate/Dimension.hs | 5 ----- 2 files changed, 7 insertions(+), 9 deletions(-) diff --git a/ringo.cabal b/ringo.cabal index 109c8cf..e205901 100644 --- a/ringo.cabal +++ b/ringo.cabal @@ -59,10 +59,13 @@ test-suite ringo-test type: exitcode-stdio-1.0 hs-source-dirs: test, src main-is: doctests.hs - build-depends: base - , ringo - , pretty-show >=1.6 && <1.7 - , doctest >=0.9 && <0.11 + if impl(ghc >= 7.10) + build-depends: base + , ringo + , pretty-show >=1.6 && <1.7 + , doctest >=0.9 && <0.11 + else + buildable: False default-language: Haskell2010 source-repository head diff --git a/src/Ringo/Generator/Populate/Dimension.hs b/src/Ringo/Generator/Populate/Dimension.hs index eead3b1..03d38d5 100644 --- a/src/Ringo/Generator/Populate/Dimension.hs +++ b/src/Ringo/Generator/Populate/Dimension.hs @@ -5,11 +5,6 @@ module Ringo.Generator.Populate.Dimension (dimensionTablePopulateSQL) where import qualified Data.Text as Text -#if MIN_VERSION_base(4,8,0) -#else -import Control.Applicative ((<$>)) -#endif - import Control.Monad.Reader (Reader, asks) import Data.Maybe (fromJust) import Data.Monoid ((<>)) From 6a107aaf8d8914431fa7e1ea59e255cbae513f46 Mon Sep 17 00:00:00 2001 From: Abhinav Sarkar Date: Mon, 4 Jan 2016 01:32:36 +0530 Subject: [PATCH 05/10] Changes dimension populatation generator to use hssqlppp internally. --- app/Main.hs | 2 +- src/Ringo.hs | 131 +++++++++++++++++++++- src/Ringo/Generator/Create.hs | 2 +- src/Ringo/Generator/Internal.hs | 11 +- src/Ringo/Generator/Populate/Dimension.hs | 69 +++++++----- src/Ringo/Generator/Sql.hs | 92 ++++++++++++++- 6 files changed, 265 insertions(+), 42 deletions(-) diff --git a/app/Main.hs b/app/Main.hs index 079ee28..2fa0119 100644 --- a/app/Main.hs +++ b/app/Main.hs @@ -56,7 +56,7 @@ writeFiles outputDir env@Env{..} = do dimTables = [ (fact, extractDimensionTables env fact) | fact <- envFacts ] factTables = [ (fact, extractFactTable env fact) | fact <- envFacts ] - dimTableDefnSQLs = [ (Create, tableName table, unlines . map sqlStr $ tableDefnSQL env table) + dimTableDefnSQLs = [ (Create, tableName table, unlines . map sqlStr $ dimensionTableDefnSQL env table) | (_, tabs) <- dimTables , table <- tabs , table `notElem` envTables ] diff --git a/src/Ringo.hs b/src/Ringo.hs index 2d8a3c2..eb41d13 100644 --- a/src/Ringo.hs +++ b/src/Ringo.hs @@ -9,7 +9,7 @@ module Ringo , extractFactTable , extractDimensionTables , extractDependencies - , tableDefnSQL + , dimensionTableDefnSQL , factTableDefnSQL , dimensionTablePopulateSQL , factTablePopulateSQL @@ -149,7 +149,7 @@ extractDependencies env = flip runReader env . E.extractDependencies -- | -- -- >>> let storySessionDimTables = extractDimensionTables env sessionFact --- >>> let sqls = map (tableDefnSQL env) storySessionDimTables +-- >>> let sqls = map (dimensionTableDefnSQL env) storySessionDimTables -- >>> mapM_ (\sqls -> mapM_ (putStr . Text.unpack) sqls >> putStrLn "--------" ) sqls -- create table dim_geo ( -- id serial not null, @@ -189,8 +189,8 @@ extractDependencies env = flip runReader env . E.extractDependencies -- device); -- -- -------- -tableDefnSQL :: Env -> Table -> [Text] -tableDefnSQL env = flip runReader env . G.tableDefnSQL +dimensionTableDefnSQL :: Env -> Table -> [Text] +dimensionTableDefnSQL env = flip runReader env . G.tableDefnSQL -- | -- @@ -222,6 +222,129 @@ tableDefnSQL env = flip runReader env . G.tableDefnSQL factTableDefnSQL :: Env -> Fact -> Table -> [Text] factTableDefnSQL env fact = flip runReader env . G.factTableDefnSQL fact +-- | +-- +-- >>> let storySessionDimTableNames = map tableName $ extractDimensionTables env sessionFact +-- >>> let sqls = map (dimensionTablePopulateSQL FullPopulation env sessionFact) storySessionDimTableNames +-- >>> mapM_ (putStr . Text.unpack) sqls +-- insert into dim_geo (country_name, +-- city_name, +-- continent_name, +-- most_specific_subdivision_name, +-- time_zone) +-- select distinct +-- coalesce(session_events.geo_country_name,'__UNKNOWN_VAL__') as geo_country_name, +-- coalesce(session_events.geo_city_name,'__UNKNOWN_VAL__') as geo_city_name, +-- coalesce(session_events.geo_continent_name,'__UNKNOWN_VAL__') as geo_continent_name, +-- coalesce(session_events.geo_most_specific_subdivision_name,'__UNKNOWN_VAL__') as geo_most_specific_subdivision_name, +-- coalesce(session_events.geo_time_zone,'__UNKNOWN_VAL__') as geo_time_zone +-- from +-- session_events +-- where +-- (geo_country_name is not null or geo_city_name is not null or geo_continent_name is not null or geo_most_specific_subdivision_name is not null or geo_time_zone is not null) +-- and +-- created_at <= ? +-- ; +-- +-- insert into dim_user_agent (browser_name, os, name, type, device) +-- select distinct +-- coalesce(session_events.browser_name,'__UNKNOWN_VAL__') as browser_name, +-- coalesce(session_events.os,'__UNKNOWN_VAL__') as os, +-- coalesce(session_events.user_agent_name,'__UNKNOWN_VAL__') as user_agent_name, +-- coalesce(session_events.user_agent_type,'__UNKNOWN_VAL__') as user_agent_type, +-- coalesce(session_events.user_agent_device,'__UNKNOWN_VAL__') as user_agent_device +-- from +-- session_events +-- where +-- (browser_name is not null or os is not null or user_agent_name is not null or user_agent_type is not null or user_agent_device is not null) +-- and +-- created_at <= ? +-- ; +-- +-- >>> let sqls = map (dimensionTablePopulateSQL IncrementalPopulation env sessionFact) storySessionDimTableNames +-- >>> mapM_ (putStr . Text.unpack) sqls +-- insert into dim_geo (country_name, +-- city_name, +-- continent_name, +-- most_specific_subdivision_name, +-- time_zone) +-- select +-- x.* +-- from +-- (select distinct +-- coalesce(session_events.geo_country_name,'__UNKNOWN_VAL__') as geo_country_name, +-- coalesce(session_events.geo_city_name,'__UNKNOWN_VAL__') as geo_city_name, +-- coalesce(session_events.geo_continent_name,'__UNKNOWN_VAL__') as geo_continent_name, +-- coalesce(session_events.geo_most_specific_subdivision_name,'__UNKNOWN_VAL__') as geo_most_specific_subdivision_name, +-- coalesce(session_events.geo_time_zone,'__UNKNOWN_VAL__') as geo_time_zone +-- from +-- session_events +-- where +-- (geo_country_name is not null or geo_city_name is not null or geo_continent_name is not null or geo_most_specific_subdivision_name is not null or geo_time_zone is not null) +-- and +-- created_at <= ? +-- and +-- created_at > ?) as x +-- left outer join +-- dim_geo +-- on dim_geo.country_name = x.geo_country_name +-- and +-- dim_geo.city_name = x.geo_city_name +-- and +-- dim_geo.continent_name = x.geo_continent_name +-- and +-- dim_geo.most_specific_subdivision_name = x.geo_most_specific_subdivision_name +-- and +-- dim_geo.time_zone = x.geo_time_zone +-- where +-- dim_geo.country_name is null and dim_geo.city_name is null +-- and +-- dim_geo.continent_name is null +-- and +-- dim_geo.most_specific_subdivision_name is null +-- and +-- dim_geo.time_zone is null +-- ; +-- +-- insert into dim_user_agent (browser_name, os, name, type, device) +-- select +-- x.* +-- from +-- (select distinct +-- coalesce(session_events.browser_name,'__UNKNOWN_VAL__') as browser_name, +-- coalesce(session_events.os,'__UNKNOWN_VAL__') as os, +-- coalesce(session_events.user_agent_name,'__UNKNOWN_VAL__') as user_agent_name, +-- coalesce(session_events.user_agent_type,'__UNKNOWN_VAL__') as user_agent_type, +-- coalesce(session_events.user_agent_device,'__UNKNOWN_VAL__') as user_agent_device +-- from +-- session_events +-- where +-- (browser_name is not null or os is not null or user_agent_name is not null or user_agent_type is not null or user_agent_device is not null) +-- and +-- created_at <= ? +-- and +-- created_at > ?) as x +-- left outer join +-- dim_user_agent +-- on dim_user_agent.browser_name = x.browser_name +-- and +-- dim_user_agent.os = x.os +-- and +-- dim_user_agent.name = x.user_agent_name +-- and +-- dim_user_agent.type = x.user_agent_type +-- and +-- dim_user_agent.device = x.user_agent_device +-- where +-- dim_user_agent.browser_name is null and dim_user_agent.os is null +-- and +-- dim_user_agent.name is null +-- and +-- dim_user_agent.type is null +-- and +-- dim_user_agent.device is null +-- ; +-- dimensionTablePopulateSQL :: TablePopulationMode -> Env -> Fact -> TableName -> Text dimensionTablePopulateSQL popMode env fact = flip runReader env . G.dimensionTablePopulateSQL popMode fact diff --git a/src/Ringo/Generator/Create.hs b/src/Ringo/Generator/Create.hs index 5570f42..304b02b 100644 --- a/src/Ringo/Generator/Create.hs +++ b/src/Ringo/Generator/Create.hs @@ -31,7 +31,7 @@ tableDefnSQL' Table {..} = do tableSQL = CreateTable ea (name tabName) (map columnDefnSQL tableColumns) [] Nothing columnDefnSQL Column {..} = - att columnName columnType $ nullableDefnSQL columnNullable + attDef columnName columnType $ nullableDefnSQL columnNullable nullableDefnSQL Null = NullConstraint ea "" nullableDefnSQL NotNull = NotNullConstraint ea "" diff --git a/src/Ringo/Generator/Internal.hs b/src/Ringo/Generator/Internal.hs index 3b776f8..a2984c7 100644 --- a/src/Ringo/Generator/Internal.hs +++ b/src/Ringo/Generator/Internal.hs @@ -5,11 +5,13 @@ module Ringo.Generator.Internal where import qualified Data.Map as Map import qualified Data.Text as Text +import Database.HsSqlPpp.Syntax (ScalarExpr) import Data.List (find) import Data.Monoid ((<>)) import Data.Text (Text) import Ringo.Extractor.Internal +import Ringo.Generator.Sql import Ringo.Types joinColumnNames :: [ColumnName] -> Text @@ -25,12 +27,15 @@ dimColumnMapping dimPrefix fact dimTableName = , dimPrefix <> dName == dimTableName ] coalesceColumn :: TypeDefaults -> TableName -> Column -> Text -coalesceColumn defaults tName Column{..} = +coalesceColumn defaults tName = ppScalarExpr . coalesceColumn' defaults tName + +coalesceColumn' :: TypeDefaults -> TableName -> Column -> ScalarExpr +coalesceColumn' defaults tName Column{..} = if columnNullable == Null - then "coalesce(" <> fqColName <> ", " <> defVal columnType <> ")" + then app "coalesce" [fqColName, num $ defVal columnType] else fqColName where - fqColName = fullColumnName tName columnName + fqColName = eqi tName columnName defVal colType = maybe (error $ "Default value not known for column type: " ++ Text.unpack colType) snd diff --git a/src/Ringo/Generator/Populate/Dimension.hs b/src/Ringo/Generator/Populate/Dimension.hs index 03d38d5..abed632 100644 --- a/src/Ringo/Generator/Populate/Dimension.hs +++ b/src/Ringo/Generator/Populate/Dimension.hs @@ -3,51 +3,58 @@ {-# LANGUAGE CPP #-} module Ringo.Generator.Populate.Dimension (dimensionTablePopulateSQL) where -import qualified Data.Text as Text +#if MIN_VERSION_base(4,8,0) +#else +import Control.Applicative ((<$>)) +#endif -import Control.Monad.Reader (Reader, asks) -import Data.Maybe (fromJust) -import Data.Monoid ((<>)) -import Data.Text (Text) +import Control.Monad.Reader (Reader, asks) +import Database.HsSqlPpp.Syntax (Statement, QueryExpr(..), Distinct(..), makeSelect, JoinType(..)) +import Data.Maybe (fromJust) +import Data.Text (Text) import Ringo.Extractor.Internal import Ringo.Generator.Internal +import Ringo.Generator.Sql import Ringo.Types dimensionTablePopulateSQL :: TablePopulationMode -> Fact -> TableName -> Reader Env Text -dimensionTablePopulateSQL popMode fact dimTableName = do +dimensionTablePopulateSQL popMode fact dimTableName = + ppSQL <$> dimensionTablePopulateSQL' popMode fact dimTableName + +dimensionTablePopulateSQL' :: TablePopulationMode -> Fact -> TableName -> Reader Env Statement +dimensionTablePopulateSQL' popMode fact dimTableName = do Settings {..} <- asks envSettings tables <- asks envTables defaults <- asks envTypeDefaults let factTable = fromJust $ findTable (factTableName fact) tables colMapping = dimColumnMapping settingDimPrefix fact dimTableName - selectCols = [ coalesceColumn defaults (factTableName fact) col <> " AS " <> cName + selectCols = [ flip sia (nmc cName) $ coalesceColumn' defaults (factTableName fact) col | (_, cName) <- colMapping , let col = fromJust . findColumn cName $ tableColumns factTable ] timeCol = head [ cName | DimTime cName <- factColumns fact ] - baseSelectC = "SELECT DISTINCT\n" <> joinColumnNames selectCols - <> "\nFROM " <> factTableName fact - baseWhereCs = [ "(\n" - <> Text.intercalate "\nOR " [ c <> " IS NOT NULL" | (_, c) <- colMapping ] - <> "\n)" - , timeCol <> " <= ?" - ] + isNotNullC = parens . foldBinop "or" . map (postop "isnotnull" . ei . snd) $ colMapping + selectWhereC = Just . foldBinop "and" $ + [ isNotNullC, binop "<=" (ei timeCol) placeholder ] ++ + [ binop ">" (ei timeCol) placeholder | popMode == IncrementalPopulation ] + selectC = makeSelect + { selDistinct = Distinct + , selSelectList = sl selectCols + , selTref = [tref $ factTableName fact] + , selWhere = selectWhereC + } - insertC selectC whereCs = - "INSERT INTO " - <> suffixTableName popMode settingTableNameSuffixTemplate dimTableName - <> " (\n" <> joinColumnNames (map fst colMapping) <> "\n) " - <> "SELECT x.* FROM (\n" - <> selectC <> "\nWHERE " <> Text.intercalate " AND\n" whereCs - <> ") x" + iTableName = suffixTableName popMode settingTableNameSuffixTemplate dimTableName + insertC = insert iTableName (map fst colMapping) $ case popMode of + FullPopulation -> selectC + IncrementalPopulation -> let alias = "x" in + makeSelect + { selSelectList = sl [si $ qstar alias] + , selTref = + [ tjoin (subtrefa alias selectC) LeftOuter (tref dimTableName) . Just $ + foldBinop "and" [ binop "=" (eqi dimTableName c1) (eqi alias c2) | (c1, c2) <- colMapping ] ] + , selWhere = + Just . foldBinop "and" . map (postop "isnull" . eqi dimTableName . fst) $ colMapping + } - return $ case popMode of - FullPopulation -> insertC baseSelectC baseWhereCs - IncrementalPopulation -> - insertC baseSelectC (baseWhereCs ++ [ timeCol <> " > ?" ]) - <> "\nLEFT JOIN " <> dimTableName <> " ON\n" - <> Text.intercalate " \nAND " - [ fullColumnName dimTableName c1 <> " = " <> fullColumnName "x" c2 - | (c1, c2) <- colMapping ] - <> "\nWHERE " <> Text.intercalate " \nAND " - [ fullColumnName dimTableName c <> " IS NULL" | (c, _) <- colMapping ] + return insertC diff --git a/src/Ringo/Generator/Sql.hs b/src/Ringo/Generator/Sql.hs index 2a8ed41..0bd19cf 100644 --- a/src/Ringo/Generator/Sql.hs +++ b/src/Ringo/Generator/Sql.hs @@ -1,3 +1,4 @@ +{-# LANGUAGE OverloadedStrings #-} module Ringo.Generator.Sql where import qualified Data.Text as Text @@ -18,9 +19,96 @@ name n = Name ea [nmc n] nmc :: Text -> NameComponent nmc = Nmc . Text.unpack -att :: Text -> Text -> RowConstraint -> AttributeDef -att nam typ constr = +attDef :: Text -> Text -> RowConstraint -> AttributeDef +attDef nam typ constr = AttributeDef ea (nmc nam) (SimpleTypeName ea $ name typ) Nothing [constr] +member :: ScalarExpr -> ScalarExpr -> ScalarExpr +member a b = BinaryOp ea (name ".") a b + +num :: Text -> ScalarExpr +num n = NumberLit ea $ Text.unpack n + +str :: Text -> ScalarExpr +str = StringLit ea . Text.unpack + +app :: Text -> [ScalarExpr] -> ScalarExpr +app n as = App ea (name n) as + +specop :: Text -> [ScalarExpr] -> ScalarExpr +specop n as = SpecialOp ea (name n) as + +prefop :: Text -> ScalarExpr -> ScalarExpr +prefop n a = PrefixOp ea (name n) a + +postop :: Text -> ScalarExpr -> ScalarExpr +postop n a = PostfixOp ea (name n) a + +binop :: Text -> ScalarExpr -> ScalarExpr -> ScalarExpr +binop n a0 a1 = BinaryOp ea (name n) a0 a1 + +foldBinop :: Text -> [ScalarExpr] -> ScalarExpr +foldBinop _ [] = error "List must be non empty" +foldBinop n (a : as) = foldl (binop n) a as + +placeholder :: ScalarExpr +placeholder = Placeholder ea + +parens :: ScalarExpr -> ScalarExpr +parens = Parens ea + +qstar :: Text -> ScalarExpr +qstar = QStar ea . nmc + +-- Table ref +tref :: Text -> TableRef +tref s = Tref ea (name s) + +-- Table ref alias +trefa :: Text -> Text -> TableRef +trefa t a = TableAlias ea (nmc a) $ Tref ea (name t) + +-- Subquery Table ref alias +subtrefa :: Text -> QueryExpr -> TableRef +subtrefa a = TableAlias ea (nmc a) . SubTref ea + +-- Table join +tjoin :: TableRef -> JoinType -> TableRef -> Maybe ScalarExpr -> TableRef +tjoin ta jt tb on = JoinTref ea ta Unnatural jt Nothing tb (fmap (JoinOn ea) on) + +-- Select item +si :: ScalarExpr -> SelectItem +si = SelExp ea + +-- Select item alias +sia :: ScalarExpr -> NameComponent -> SelectItem +sia e a = SelectItem ea e a + +-- Expression qualified identifier +eqi :: Text -> Text -> ScalarExpr +eqi c x = Identifier ea $ qn c x + +-- Expression identifier +ei :: Text -> ScalarExpr +ei = Identifier ea . name + +-- Qualified name +qn :: Text -> Text -> Name +qn c n = Name ea [nmc c, nmc n] + +-- Select list +sl :: [SelectItem] -> SelectList +sl = SelectList ea + +insert :: Text -> [Text] -> QueryExpr -> Statement +insert tName cNames selectExp = + Insert ea (name tName) (map nmc cNames) selectExp Nothing + ppSQL :: Statement -> Text ppSQL st = TL.toStrict $ prettyStatements (PrettyFlags postgresDialect) [st] + +ppScalarExpr :: ScalarExpr -> Text +ppScalarExpr = TL.toStrict . prettyScalarExpr (PrettyFlags postgresDialect) + +ppQueryExpr :: QueryExpr -> Text +ppQueryExpr = TL.toStrict . prettyQueryExpr (PrettyFlags postgresDialect) From 46e5e438564e711f3e13eba049a1cdc39c02723a Mon Sep 17 00:00:00 2001 From: Abhinav Sarkar Date: Mon, 25 Jan 2016 22:13:47 +0530 Subject: [PATCH 06/10] Changes fact populatation generator to use hssqlppp internally. --- src/Ringo/Generator/Create.hs | 4 +- src/Ringo/Generator/Internal.hs | 19 +- src/Ringo/Generator/Populate/Dimension.hs | 8 +- src/Ringo/Generator/Populate/Fact.hs | 218 +++++++++++----------- src/Ringo/Generator/Sql.hs | 54 ++++-- 5 files changed, 149 insertions(+), 154 deletions(-) diff --git a/src/Ringo/Generator/Create.hs b/src/Ringo/Generator/Create.hs index 0c0f460..711ad86 100644 --- a/src/Ringo/Generator/Create.hs +++ b/src/Ringo/Generator/Create.hs @@ -47,8 +47,8 @@ tableDefnStmts Table {..} = do tableDefnSQL :: Table -> (Table -> Reader Env [Statement]) -> Reader Env [Text] tableDefnSQL table indexFn = do - ds <- map ppSQL <$> tableDefnStmts table - is <- map (\st -> ppSQL st <> ";\n") <$> indexFn table + ds <- map ppStatement <$> tableDefnStmts table + is <- map (\st -> ppStatement st <> ";\n") <$> indexFn table return $ ds ++ is dimensionTableDefnSQL :: Table -> Reader Env [Text] diff --git a/src/Ringo/Generator/Internal.hs b/src/Ringo/Generator/Internal.hs index a2984c7..245b940 100644 --- a/src/Ringo/Generator/Internal.hs +++ b/src/Ringo/Generator/Internal.hs @@ -6,31 +6,22 @@ import qualified Data.Map as Map import qualified Data.Text as Text import Database.HsSqlPpp.Syntax (ScalarExpr) -import Data.List (find) -import Data.Monoid ((<>)) -import Data.Text (Text) +import Data.List (find) +import Data.Monoid ((<>)) +import Data.Text (Text) import Ringo.Extractor.Internal import Ringo.Generator.Sql import Ringo.Types -joinColumnNames :: [ColumnName] -> Text -joinColumnNames = Text.intercalate ",\n" - -fullColumnName :: TableName -> ColumnName -> ColumnName -fullColumnName tName cName = tName <> "." <> cName - dimColumnMapping :: Text -> Fact -> TableName -> [(ColumnName, ColumnName)] dimColumnMapping dimPrefix fact dimTableName = [ (dimColumnName dName cName, cName) | DimVal dName cName <- factColumns fact , dimPrefix <> dName == dimTableName ] -coalesceColumn :: TypeDefaults -> TableName -> Column -> Text -coalesceColumn defaults tName = ppScalarExpr . coalesceColumn' defaults tName - -coalesceColumn' :: TypeDefaults -> TableName -> Column -> ScalarExpr -coalesceColumn' defaults tName Column{..} = +coalesceColumn :: TypeDefaults -> TableName -> Column -> ScalarExpr +coalesceColumn defaults tName Column{..} = if columnNullable == Null then app "coalesce" [fqColName, num $ defVal columnType] else fqColName diff --git a/src/Ringo/Generator/Populate/Dimension.hs b/src/Ringo/Generator/Populate/Dimension.hs index 08d5714..f125fc9 100644 --- a/src/Ringo/Generator/Populate/Dimension.hs +++ b/src/Ringo/Generator/Populate/Dimension.hs @@ -20,16 +20,16 @@ import Ringo.Types dimensionTablePopulateSQL :: TablePopulationMode -> Fact -> TableName -> Reader Env Text dimensionTablePopulateSQL popMode fact dimTableName = - ppSQL <$> dimensionTablePopulateSQL' popMode fact dimTableName + ppStatement <$> dimensionTablePopulateStmt popMode fact dimTableName -dimensionTablePopulateSQL' :: TablePopulationMode -> Fact -> TableName -> Reader Env Statement -dimensionTablePopulateSQL' popMode fact dimTableName = do +dimensionTablePopulateStmt :: TablePopulationMode -> Fact -> TableName -> Reader Env Statement +dimensionTablePopulateStmt popMode fact dimTableName = do Settings {..} <- asks envSettings tables <- asks envTables defaults <- asks envTypeDefaults let factTable = fromJust $ findTable (factTableName fact) tables colMapping = dimColumnMapping settingDimPrefix fact dimTableName - selectCols = [ flip sia (nmc cName) $ coalesceColumn' defaults (factTableName fact) col + selectCols = [ flip sia (nmc cName) $ coalesceColumn defaults (factTableName fact) col | (_, cName) <- colMapping , let col = fromJust . findColumn cName $ tableColumns factTable ] timeCol = head [ cName | DimTime cName <- factColumns fact ] diff --git a/src/Ringo/Generator/Populate/Fact.hs b/src/Ringo/Generator/Populate/Fact.hs index 54847a6..a580e31 100644 --- a/src/Ringo/Generator/Populate/Fact.hs +++ b/src/Ringo/Generator/Populate/Fact.hs @@ -10,18 +10,21 @@ import qualified Data.Text as Text #if MIN_VERSION_base(4,8,0) #else -import Control.Applicative ((<$>)) +import Control.Applicative ((<$>)) #endif -import Control.Monad.Reader (Reader, asks) -import Data.List (nub) -import Data.Maybe (fromJust, fromMaybe, mapMaybe, listToMaybe) -import Data.Monoid ((<>)) -import Data.Text (Text) -import Text.RawString.QQ (r) +import Control.Monad.Reader (Reader, asks) +import Database.HsSqlPpp.Syntax ( QueryExpr(..), Statement, makeSelect + , SelectList(..), SelectItem(..), JoinType(..) ) +import Data.List (nub) +import Data.Maybe (fromJust, fromMaybe, listToMaybe) +import Data.Monoid ((<>)) +import Data.Text (Text) +import Text.RawString.QQ (r) import Ringo.Extractor.Internal import Ringo.Generator.Internal +import Ringo.Generator.Sql import Ringo.Types import Ringo.Utils @@ -48,18 +51,12 @@ BEGIN RETURN r; END; $$ -LANGUAGE 'plpgsql' IMMUTABLE|] +LANGUAGE 'plpgsql' IMMUTABLE; +|] -data FactTablePopulateSelectSQL = FactTablePopulateSelectSQL - { ftpsSelectCols :: ![(Text, Text)] - , ftpsSelectTable :: !Text - , ftpsJoinClauses :: ![Text] - , ftpsWhereClauses :: ![Text] - , ftpsGroupByCols :: ![Text] - } deriving (Show, Eq) - -factTableUpdateSQL :: TablePopulationMode -> Fact -> Text -> FactTablePopulateSelectSQL -> Reader Env [Text] -factTableUpdateSQL popMode fact groupByColPrefix populateSelectSQL@FactTablePopulateSelectSQL {..} = do +factCountDistinctUpdateStmts :: TablePopulationMode -> Fact -> Text -> QueryExpr -> Reader Env [Statement] +factCountDistinctUpdateStmts + popMode fact groupByColPrefix ~Select {selSelectList = SelectList _ origSelectItems, ..} = do Settings {..} <- asks envSettings tables <- asks envTables let countDistinctCols = [ col | col@(FactCountDistinct _ _) <- factColumns fact] @@ -70,50 +67,57 @@ factTableUpdateSQL popMode fact groupByColPrefix populateSelectSQL@FactTablePopu suffixTableName popMode settingTableNameSuffixTemplate $ extractedFactTableName settingFactPrefix settingFactInfix (factName fact) settingTimeUnit - return . (\xs -> if null xs then xs else ilog2FunctionString : xs) - $ for countDistinctCols $ \(FactCountDistinct scName cName) -> - let unqCol = fullColumnName fTableName (fromMaybe tablePKColName scName) <> "::text" + return $ for countDistinctCols $ \(FactCountDistinct scName cName) -> + let unqCol = cast (eqi fTableName (fromMaybe tablePKColName scName)) "text" bucketSelectCols = - [ ( "hashtext(" <> unqCol <> ") & " - <> Text.pack (show $ bucketCount settingFactCountDistinctErrorRate - 1) - , cName <> "_bnum" - ) - , ( "31 - ilog2(min(hashtext(" <> unqCol <> ") & ~(1 << 31)))" - , cName <> "_bhash" - ) + [ sia (binop "&" (app "hashtext" [ unqCol ]) + (num . Text.pack . show $ bucketCount settingFactCountDistinctErrorRate - 1)) + (nmc $ cName <> "_bnum") + , sia (binop "-" + (num "31") + (app "ilog2" + [ app "min" [ binop "&" + (app "hashtext" [ unqCol ]) + (prefop "~" (parens (binop "<<" (num "1") (num "31"))))]])) + (nmc $ cName <> "_bhash") ] - selectSQL = toSelectSQL $ - populateSelectSQL - { ftpsSelectCols = filter ((`elem` ftpsGroupByCols) . snd) ftpsSelectCols ++ bucketSelectCols - , ftpsGroupByCols = ftpsGroupByCols ++ [ cName <> "_bnum" ] - , ftpsWhereClauses = ftpsWhereClauses ++ [ unqCol <> " IS NOT NULL" ] + groupByCols = map ppScalarExpr selGroupBy + selectList = + [ i | i@(SelectItem _ _ a) <- origSelectItems , a `elem` map nmc groupByCols ] + + selectStmt = + makeSelect + { selSelectList = sl $ selectList ++ bucketSelectCols + , selTref = selTref + , selWhere = binop "and" (postop "isnotnull" unqCol) <$> selWhere + , selGroupBy = selGroupBy ++ [ ei $ cName <> "_bnum" ] } aggSelectClause = - "json_object_agg(" <> cName <> "_bnum, " <> cName <> "_bhash) AS " <> cName + sia (app "json_object_agg" [ ei (cName <> "_bnum"), ei (cName <> "_bhash") ]) (nmc cName) - in "UPDATE " <> extFactTableName - <> "\nSET " <> cName <> " = " <> fullColumnName "xyz" cName - <> "\nFROM (" - <> "\nSELECT " <> joinColumnNames (ftpsGroupByCols ++ [aggSelectClause]) - <> "\nFROM (\n" <> selectSQL <> "\n) zyx" - <> "\nGROUP BY \n" <> joinColumnNames ftpsGroupByCols - <> "\n) xyz" - <> "\n WHERE\n" - <> Text.intercalate "\nAND " - [ fullColumnName extFactTableName .fromJust . Text.stripPrefix groupByColPrefix $ col - <> " = " <> fullColumnName "xyz" col - | col <- ftpsGroupByCols ] + in update extFactTableName + [ (cName, eqi "xyz" cName) ] + [ subtrefa "xyz" + makeSelect + { selSelectList = sl $ map (si . ei) groupByCols ++ [ aggSelectClause ] + , selTref = [ subtrefa "zyx" selectStmt ] + , selGroupBy = selGroupBy + } ] $ + foldBinop "and" + [ binop "=" (eqi extFactTableName . fromJust . Text.stripPrefix groupByColPrefix $ col) + (eqi "xyz" col) + | col <- groupByCols ] where bucketCount :: Double -> Integer bucketCount errorRate = let power :: Double = fromIntegral (ceiling . logBase 2 $ (1.04 / errorRate) ** 2 :: Integer) in ceiling $ 2 ** power -factTablePopulateSQL :: TablePopulationMode -> Fact -> Reader Env [Text] -factTablePopulateSQL popMode fact = do +factTablePopulateStmts :: TablePopulationMode -> Fact -> Reader Env [Statement] +factTablePopulateStmts popMode fact = do Settings {..} <- asks envSettings allDims <- extractAllDimensionTables fact tables <- asks envTables @@ -122,116 +126,102 @@ factTablePopulateSQL popMode fact = do fTable = fromJust . findTable fTableName $ tables dimIdColName = settingDimTableIdColumnName - coalesceFKId col = - if "coalesce" `Text.isPrefixOf` col - then col - else "coalesce((" <> col <> "), " <> Text.pack (show settingForeignKeyIdCoalesceValue) <> ")" + coalesceFKId ex = + app "coalesce" [ ex, num . Text.pack . show $ settingForeignKeyIdCoalesceValue ] timeUnitColumnInsertSQL cName = let colName = timeUnitColumnName dimIdColName cName settingTimeUnit in ( colName - , "floor(extract(epoch from " <> fullColumnName fTableName cName <> ")/" - <> Text.pack (show $ timeUnitToSeconds settingTimeUnit) <> ")::bigint" + , cast (app "floor" [ binop "/" (extEpoch (eqi fTableName cName)) + (num . Text.pack . show . timeUnitToSeconds $ settingTimeUnit) ]) + "bigint" , True ) dimIdColumnInsertSQL cName = let sCol = fromJust . findColumn cName $ tableColumns fTable in (cName, coalesceColumn defaults fTableName sCol, True) + app' f cName = app f [ eqi fTableName cName ] + factColMap = concatFor (factColumns fact) $ \col -> case col of DimTime cName -> [ timeUnitColumnInsertSQL cName ] NoDimId cName -> [ dimIdColumnInsertSQL cName ] TenantId cName -> [ dimIdColumnInsertSQL cName ] FactCount scName cName -> - [ (cName, "count(" <> maybe "*" (fullColumnName fTableName) scName <> ")", False) ] - FactSum scName cName -> - [ (cName, "sum(" <> fullColumnName fTableName scName <> ")", False) ] - FactMax scName cName -> - [ (cName, "max(" <> fullColumnName fTableName scName <> ")", False) ] - FactMin scName cName -> - [ (cName, "min(" <> fullColumnName fTableName scName <> ")", False) ] + [ (cName, app "count" [ maybe star (eqi fTableName) scName ], False) ] + FactSum scName cName -> [ (cName, app' "sum" scName, False) ] + FactMax scName cName -> [ (cName, app' "max" scName, False) ] + FactMin scName cName -> [ (cName, app' "min" scName, False) ] FactAverage scName cName -> - [ ( cName <> settingAvgCountColumSuffix - , "count(" <> fullColumnName fTableName scName <> ")" - , False - ) - , ( cName <> settingAvgSumColumnSuffix - , "sum(" <> fullColumnName fTableName scName <> ")" - , False - ) + [ ( cName <> settingAvgCountColumSuffix, app' "count" scName, False ) + , ( cName <> settingAvgSumColumnSuffix , app' "sum" scName , False) ] - FactCountDistinct _ cName -> [ (cName, "'{}'::json", False)] - _ -> [] + FactCountDistinct _ cName -> [ (cName, cast (str "{}") "json", False) ] + _ -> [] dimColMap = for allDims $ \(dimFact, factTable@Table {tableName}) -> let dimFKIdColName = factDimFKIdColumnName settingDimPrefix dimIdColName tableName factSourceTableName = factTableName dimFact factSourceTable = fromJust . findTable factSourceTableName $ tables dimFKIdColumn = fromJust . findColumn dimFKIdColName $ tableColumns factSourceTable - dimLookupWhereClauses = - [ fullColumnName tableName dimColName <> " = " <> coalesceColumn defaults factSourceTableName sourceCol + dimLookupWhereClauses = Just . foldBinop "and" $ + [ binop "=" (eqi tableName dimColName) (coalesceColumn defaults factSourceTableName sourceCol) | (dimColName, sourceColName) <- dimColumnMapping settingDimPrefix dimFact tableName , let sourceCol = fromJust . findColumn sourceColName $ tableColumns factSourceTable ] - insertSQL = if factTable `elem` tables -- existing dimension table + insertExpr = if factTable `elem` tables -- existing dimension table then (if columnNullable dimFKIdColumn == Null then coalesceFKId else id) - $ fullColumnName factSourceTableName dimFKIdColName - else "SELECT " <> dimIdColName <> " FROM " - <> suffixTableName popMode settingTableNameSuffixTemplate tableName <> " " <> tableName - <> "\nWHERE " <> Text.intercalate "\n AND " dimLookupWhereClauses - in (dimFKIdColName, coalesceFKId insertSQL, True) + $ eqi factSourceTableName dimFKIdColName + else coalesceFKId . subQueryExp $ + makeSelect + { selSelectList = sl [ si $ ei dimIdColName ] + , selTref = + [ trefa (suffixTableName popMode settingTableNameSuffixTemplate tableName) tableName ] + , selWhere = dimLookupWhereClauses + } + in (dimFKIdColName, insertExpr, True) - colMap = [ (cName, (sql, groupByColPrefix <> cName), addToGroupBy) - | (cName, sql, addToGroupBy) <- factColMap ++ dimColMap ] + colMap = [ (cName, (expr, nmc $ groupByColPrefix <> cName), addToGroupBy) + | (cName, expr, addToGroupBy) <- factColMap ++ dimColMap ] joinClauses = - mapMaybe (\tName -> (\p -> "LEFT JOIN " <> tName <> "\nON "<> p) <$> joinClausePreds fTable tName) + map (tref &&& joinClausePreds fTable) + . filter (/= fTableName) . nub . map (factTableName . fst) $ allDims - timeCol = fullColumnName fTableName $ head [ cName | DimTime cName <- factColumns fact ] + timeCol = eqi fTableName $ head [ cName | DimTime cName <- factColumns fact ] extFactTableName = suffixTableName popMode settingTableNameSuffixTemplate $ extractedFactTableName settingFactPrefix settingFactInfix (factName fact) settingTimeUnit - populateSelectSQL = - FactTablePopulateSelectSQL - { ftpsSelectCols = map snd3 colMap - , ftpsSelectTable = fTableName - , ftpsJoinClauses = joinClauses - , ftpsWhereClauses = - timeCol <> " < ?" : [ timeCol <> " >= ?" | popMode == IncrementalPopulation ] - , ftpsGroupByCols = map ((groupByColPrefix <>) . fst3) . filter thd3 $ colMap + populateSelectExpr = + makeSelect + { selSelectList = sl . map (uncurry sia . snd3) $ colMap + , selTref = [ foldl (\tf (t, oc) -> tjoin tf LeftOuter t oc) (tref fTableName) joinClauses ] + , selWhere = Just . foldBinop "and" $ + binop "<" timeCol placeholder : + [ binop ">=" timeCol placeholder | popMode == IncrementalPopulation ] + , selGroupBy = map (ei . (groupByColPrefix <>) . fst3) . filter thd3 $ colMap } - insertIntoSQL = "INSERT INTO " <> extFactTableName - <> " (\n" <> Text.intercalate ",\n " (map fst3 colMap) <> "\n)\n" - <> toSelectSQL populateSelectSQL + insertIntoStmt = insert extFactTableName (map fst3 colMap) populateSelectExpr - updateSQLs <- factTableUpdateSQL popMode fact groupByColPrefix populateSelectSQL - - return $ insertIntoSQL : updateSQLs + updateStmts <- factCountDistinctUpdateStmts popMode fact groupByColPrefix populateSelectExpr + return $ insertIntoStmt : updateStmts where groupByColPrefix = "xxff_" joinClausePreds table oTableName = - Text.intercalate " AND " - . map (\(c1, c2) -> fullColumnName (tableName table) c1 <> " = " <> fullColumnName oTableName c2) + foldBinop "and" + . map (\(c1, c2) -> binop "=" (eqi (tableName table) c1) (eqi oTableName c2)) <$> listToMaybe [ colPairs | ForeignKey tName colPairs <- tableConstraints table , tName == oTableName ] -toSelectSQL :: FactTablePopulateSelectSQL -> Text -toSelectSQL FactTablePopulateSelectSQL {..} = - "SELECT \n" <> joinColumnNames (map (uncurry asName) ftpsSelectCols) - <> "\nFROM " <> ftpsSelectTable - <> (if not . null $ ftpsJoinClauses - then "\n" <> Text.intercalate "\n" ftpsJoinClauses - else "") - <> (if not . null $ ftpsWhereClauses - then "\nWHERE " <> Text.intercalate "\nAND " ftpsWhereClauses - else "") - <> "\nGROUP BY \n" - <> joinColumnNames ftpsGroupByCols - where - asName sql alias = "(" <> sql <> ")" <> " as " <> alias - +factTablePopulateSQL :: TablePopulationMode -> Fact -> Reader Env [Text] +factTablePopulateSQL popMode fact = do + stmts <- factTablePopulateStmts popMode fact + return $ case stmts of + [] -> [] + [i] -> [ ppStatement i ] + i:us -> [ ppStatement i, ilog2FunctionString ] ++ map ppStatement us diff --git a/src/Ringo/Generator/Sql.hs b/src/Ringo/Generator/Sql.hs index 0bd19cf..c168244 100644 --- a/src/Ringo/Generator/Sql.hs +++ b/src/Ringo/Generator/Sql.hs @@ -1,14 +1,14 @@ {-# LANGUAGE OverloadedStrings #-} module Ringo.Generator.Sql where -import qualified Data.Text as Text +import qualified Data.Text as Text import qualified Data.Text.Lazy as TL import Database.HsSqlPpp.Annotation -import Database.HsSqlPpp.Dialect (postgresDialect) +import Database.HsSqlPpp.Dialect import Database.HsSqlPpp.Pretty import Database.HsSqlPpp.Syntax -import Data.Text (Text) +import Data.Text (Text) ea :: Annotation ea = emptyAnnotation @@ -24,28 +24,31 @@ attDef nam typ constr = AttributeDef ea (nmc nam) (SimpleTypeName ea $ name typ) Nothing [constr] member :: ScalarExpr -> ScalarExpr -> ScalarExpr -member a b = BinaryOp ea (name ".") a b +member = BinaryOp ea (name ".") num :: Text -> ScalarExpr -num n = NumberLit ea $ Text.unpack n +num = NumberLit ea . Text.unpack str :: Text -> ScalarExpr str = StringLit ea . Text.unpack -app :: Text -> [ScalarExpr] -> ScalarExpr -app n as = App ea (name n) as +extEpoch :: ScalarExpr -> ScalarExpr +extEpoch = Extract ea ExtractEpoch -specop :: Text -> [ScalarExpr] -> ScalarExpr -specop n as = SpecialOp ea (name n) as +app :: Text -> [ScalarExpr] -> ScalarExpr +app n = App ea (name n) + +cast :: ScalarExpr -> Text -> ScalarExpr +cast ex = Cast ea ex . SimpleTypeName ea . name prefop :: Text -> ScalarExpr -> ScalarExpr -prefop n a = PrefixOp ea (name n) a +prefop n = PrefixOp ea (name n) postop :: Text -> ScalarExpr -> ScalarExpr -postop n a = PostfixOp ea (name n) a +postop n = PostfixOp ea (name n) binop :: Text -> ScalarExpr -> ScalarExpr -> ScalarExpr -binop n a0 a1 = BinaryOp ea (name n) a0 a1 +binop n = BinaryOp ea (name n) foldBinop :: Text -> [ScalarExpr] -> ScalarExpr foldBinop _ [] = error "List must be non empty" @@ -60,9 +63,15 @@ parens = Parens ea qstar :: Text -> ScalarExpr qstar = QStar ea . nmc +star :: ScalarExpr +star = Star ea + +subQueryExp :: QueryExpr -> ScalarExpr +subQueryExp = ScalarSubQuery ea + -- Table ref tref :: Text -> TableRef -tref s = Tref ea (name s) +tref = Tref ea . name -- Table ref alias trefa :: Text -> Text -> TableRef @@ -82,11 +91,11 @@ si = SelExp ea -- Select item alias sia :: ScalarExpr -> NameComponent -> SelectItem -sia e a = SelectItem ea e a +sia = SelectItem ea -- Expression qualified identifier eqi :: Text -> Text -> ScalarExpr -eqi c x = Identifier ea $ qn c x +eqi c = Identifier ea . qn c -- Expression identifier ei :: Text -> ScalarExpr @@ -100,15 +109,20 @@ qn c n = Name ea [nmc c, nmc n] sl :: [SelectItem] -> SelectList sl = SelectList ea +-- Insert statement insert :: Text -> [Text] -> QueryExpr -> Statement insert tName cNames selectExp = Insert ea (name tName) (map nmc cNames) selectExp Nothing -ppSQL :: Statement -> Text -ppSQL st = TL.toStrict $ prettyStatements (PrettyFlags postgresDialect) [st] +-- Update statement +update :: Text -> [(Text, ScalarExpr)] -> [TableRef] -> ScalarExpr -> Statement +update tName setClauseList fromList whr = + Update ea (name tName) (map (uncurry (SetClause ea . nmc)) setClauseList) fromList (Just whr) Nothing +-- Pretty print statement +ppStatement :: Statement -> Text +ppStatement st = TL.toStrict $ prettyStatements (PrettyFlags postgresDialect) [st] + +-- Pretty print scalar expression ppScalarExpr :: ScalarExpr -> Text ppScalarExpr = TL.toStrict . prettyScalarExpr (PrettyFlags postgresDialect) - -ppQueryExpr :: QueryExpr -> Text -ppQueryExpr = TL.toStrict . prettyQueryExpr (PrettyFlags postgresDialect) From 68bf32a673146cacffcc1d0b5dd2a3a82ecbc6b7 Mon Sep 17 00:00:00 2001 From: Abhinav Sarkar Date: Mon, 25 Jan 2016 22:27:38 +0530 Subject: [PATCH 07/10] Adds doctests for factTablePopulateSQL and validate* functions. --- src/Ringo.hs | 112 ++++++++++++++++++++++++++++++++++++++++++++++++++- 1 file changed, 111 insertions(+), 1 deletion(-) diff --git a/src/Ringo.hs b/src/Ringo.hs index 2035c53..da8a2de 100644 --- a/src/Ringo.hs +++ b/src/Ringo.hs @@ -36,7 +36,7 @@ import qualified Ringo.Validator as V -- Table { tableName = "session_events" -- , tableColumns = -- [ Column "id" "uuid" NotNull --- , Column "created_at" "timestamp without time zone" Null +-- , Column "created_at" "timestamp without time zone" NotNull -- , Column "member_id" "integer" Null -- , Column "publisher_id" "integer" NotNull -- , Column "user_agent" "character varying(1024)" Null @@ -369,12 +369,122 @@ dimensionTablePopulateSQL :: TablePopulationMode -> Env -> Fact -> TableName -> dimensionTablePopulateSQL popMode env fact = flip runReader env . G.dimensionTablePopulateSQL popMode fact +-- | +-- +-- >>> let sqls = factTablePopulateSQL FullPopulation env sessionFact +-- >>> mapM_ (putStr . Text.unpack) sqls +-- insert into fact_session_by_minute (created_at_minute_id, +-- publisher_id, +-- session_count, +-- geo_id, +-- user_agent_id) +-- select +-- cast(floor(extract(epoch from session_events.created_at) / 60) as bigint) as xxff_created_at_minute_id, +-- session_events.publisher_id as xxff_publisher_id, +-- count(*) as xxff_session_count, +-- coalesce((select +-- id +-- from +-- dim_geo as dim_geo +-- where +-- dim_geo.country_name = coalesce(session_events.geo_country_name,'__UNKNOWN_VAL__') +-- and +-- dim_geo.city_name = coalesce(session_events.geo_city_name,'__UNKNOWN_VAL__') +-- and +-- dim_geo.continent_name = coalesce(session_events.geo_continent_name,'__UNKNOWN_VAL__') +-- and +-- dim_geo.most_specific_subdivision_name = coalesce(session_events.geo_most_specific_subdivision_name,'__UNKNOWN_VAL__') +-- and +-- dim_geo.time_zone = coalesce(session_events.geo_time_zone,'__UNKNOWN_VAL__')),-1) as xxff_geo_id, +-- coalesce((select +-- id +-- from +-- dim_user_agent as dim_user_agent +-- where +-- dim_user_agent.browser_name = coalesce(session_events.browser_name,'__UNKNOWN_VAL__') +-- and +-- dim_user_agent.os = coalesce(session_events.os,'__UNKNOWN_VAL__') +-- and +-- dim_user_agent.name = coalesce(session_events.user_agent_name,'__UNKNOWN_VAL__') +-- and +-- dim_user_agent.type = coalesce(session_events.user_agent_type,'__UNKNOWN_VAL__') +-- and +-- dim_user_agent.device = coalesce(session_events.user_agent_device,'__UNKNOWN_VAL__')),-1) as xxff_user_agent_id +-- from +-- session_events +-- where +-- session_events.created_at < ? +-- group by +-- xxff_created_at_minute_id, +-- xxff_publisher_id, +-- xxff_geo_id, +-- xxff_user_agent_id +-- ; +-- +-- >>> let sqls = factTablePopulateSQL IncrementalPopulation env sessionFact +-- >>> mapM_ (putStr . Text.unpack) sqls +-- insert into fact_session_by_minute (created_at_minute_id, +-- publisher_id, +-- session_count, +-- geo_id, +-- user_agent_id) +-- select +-- cast(floor(extract(epoch from session_events.created_at) / 60) as bigint) as xxff_created_at_minute_id, +-- session_events.publisher_id as xxff_publisher_id, +-- count(*) as xxff_session_count, +-- coalesce((select +-- id +-- from +-- dim_geo as dim_geo +-- where +-- dim_geo.country_name = coalesce(session_events.geo_country_name,'__UNKNOWN_VAL__') +-- and +-- dim_geo.city_name = coalesce(session_events.geo_city_name,'__UNKNOWN_VAL__') +-- and +-- dim_geo.continent_name = coalesce(session_events.geo_continent_name,'__UNKNOWN_VAL__') +-- and +-- dim_geo.most_specific_subdivision_name = coalesce(session_events.geo_most_specific_subdivision_name,'__UNKNOWN_VAL__') +-- and +-- dim_geo.time_zone = coalesce(session_events.geo_time_zone,'__UNKNOWN_VAL__')),-1) as xxff_geo_id, +-- coalesce((select +-- id +-- from +-- dim_user_agent as dim_user_agent +-- where +-- dim_user_agent.browser_name = coalesce(session_events.browser_name,'__UNKNOWN_VAL__') +-- and +-- dim_user_agent.os = coalesce(session_events.os,'__UNKNOWN_VAL__') +-- and +-- dim_user_agent.name = coalesce(session_events.user_agent_name,'__UNKNOWN_VAL__') +-- and +-- dim_user_agent.type = coalesce(session_events.user_agent_type,'__UNKNOWN_VAL__') +-- and +-- dim_user_agent.device = coalesce(session_events.user_agent_device,'__UNKNOWN_VAL__')),-1) as xxff_user_agent_id +-- from +-- session_events +-- where +-- session_events.created_at < ? and session_events.created_at >= ? +-- group by +-- xxff_created_at_minute_id, +-- xxff_publisher_id, +-- xxff_geo_id, +-- xxff_user_agent_id +-- ; +-- factTablePopulateSQL :: TablePopulationMode -> Env -> Fact -> [Text] factTablePopulateSQL popMode env = flip runReader env . G.factTablePopulateSQL popMode +-- | +-- +-- >>> validateTable env sessionEventsTable +-- [] validateTable :: Env -> Table -> [ValidationError] validateTable env = flip runReader env . V.validateTable +-- | +-- +-- >>> validateFact env sessionFact +-- [] validateFact :: Env -> Fact -> [ValidationError] validateFact env = flip runReader env . V.validateFact From 04be3b69f19fe7b6380d656e8547177ea97f6ea1 Mon Sep 17 00:00:00 2001 From: Abhinav Sarkar Date: Tue, 26 Jan 2016 01:34:09 +0530 Subject: [PATCH 08/10] Adds more doctests. - Found and fixed a bug regarding column names of FKs to existing dimension tables (from DimId fact columns). --- src/Ringo.hs | 344 +++++++++++++++++++-------- src/Ringo/Extractor.hs | 6 +- src/Ringo/Extractor/Internal.hs | 8 +- src/Ringo/Generator/Create.hs | 5 +- src/Ringo/Generator/Populate/Fact.hs | 17 +- 5 files changed, 263 insertions(+), 117 deletions(-) diff --git a/src/Ringo.hs b/src/Ringo.hs index da8a2de..efa4003 100644 --- a/src/Ringo.hs +++ b/src/Ringo.hs @@ -30,6 +30,7 @@ import qualified Ringo.Validator as V -- >>> import Ringo -- >>> import qualified Data.Map as Map -- >>> import qualified Data.Text as Text +-- >>> import Data.List (nub) -- >>> import Text.Show.Pretty -- >>> :{ --let sessionEventsTable = @@ -68,18 +69,63 @@ import qualified Ringo.Validator as V -- , DimVal "user_agent" "browser_name" -- , DimVal "user_agent" "os" -- , DimVal "user_agent" "user_agent_name" --- , DimVal "user_agent" "user_agent_type" --- , DimVal "user_agent" "user_agent_device" -- , DimVal "geo" "geo_country_name" -- , DimVal "geo" "geo_city_name" -- , DimVal "geo" "geo_continent_name" --- , DimVal "geo" "geo_most_specific_subdivision_name" --- , DimVal "geo" "geo_time_zone" -- , FactCount Nothing "session_count" -- ] -- } --- tables = [sessionEventsTable] --- facts = [sessionFact] +-- pageViewEventsTable = +-- Table { tableName = "page_view_events" +-- , tableColumns = +-- [ Column "id" "uuid" NotNull +-- , Column "created_at" "timestamp without time zone" NotNull +-- , Column "member_id" "integer" Null +-- , Column "publisher_id" "integer" NotNull +-- , Column "device_tracker_id" "uuid" Null +-- , Column "session_event_id" "uuid" Null +-- , Column "page_type" "character varying(20)" NotNull +-- , Column "referrer" "character varying(1024)" Null +-- , Column "url" "character varying(1024)" Null +-- , Column "referrer_id" "integer" Null +-- ] +-- , tableConstraints = +-- [ PrimaryKey "id" +-- , ForeignKey "session_events" [ ("session_event_id", "id") +-- , ("publisher_id", "publisher_id") +-- ] +-- , ForeignKey "referrers" [ ("referrer_id", "id") +-- , ("publisher_id", "publisher_id") +-- ] +-- ] +-- } +-- pageViewFact = +-- Fact { factName = "page_view" +-- , factTableName = "page_view_events" +-- , factTablePersistent = True +-- , factParentNames = [ "session" ] +-- , factColumns = +-- [ DimTime "created_at" +-- , NoDimId "publisher_id" +-- , DimVal "page_type" "page_type" +-- , DimId "referrers" "referrer_id" +-- , FactCount Nothing "view_count" +-- ] +-- } +-- referrersTable = +-- Table { tableName = "referrers" +-- , tableColumns = +-- [ Column "id" "integer" NotNull +-- , Column "publisher_id" "integer" NotNull +-- , Column "name" "character varying(100)" NotNull +-- ] +-- , tableConstraints = +-- [ PrimaryKey "id" +-- , UniqueKey ["publisher_id", "name"] +-- ] +-- } +-- tables = [sessionEventsTable, pageViewEventsTable, referrersTable] +-- facts = [sessionFact, pageViewFact] -- typeDefaults = Map.fromList [ ("integer", "-1") -- , ("timestamp", "'00-00-00 00:00:00'") -- , ("character", "'__UNKNOWN_VAL__'") @@ -104,6 +150,17 @@ import qualified Ringo.Validator as V -- Column user_agent_id integer NOT NULL -- UniqueKey (created_at_minute_id, publisher_id, geo_id, user_agent_id) -- +-- >>> print $ extractFactTable env pageViewFact +-- Table fact_page_view_by_minute +-- Column created_at_minute_id bigint NOT NULL +-- Column publisher_id integer NOT NULL +-- Column view_count integer NOT NULL +-- Column referrer_id integer NOT NULL +-- Column page_type_id integer NOT NULL +-- Column geo_id integer NOT NULL +-- Column user_agent_id integer NOT NULL +-- UniqueKey (created_at_minute_id, publisher_id, referrer_id, page_type_id, geo_id, user_agent_id) +-- extractFactTable :: Env -> Fact -> Table extractFactTable env = flip runReader env . E.extractFactTable @@ -115,20 +172,23 @@ extractFactTable env = flip runReader env . E.extractFactTable -- Column country_name character varying(50) NOT NULL -- Column city_name character varying(50) NOT NULL -- Column continent_name character varying(15) NOT NULL --- Column most_specific_subdivision_name character varying(100) NOT NULL --- Column time_zone character varying(20) NOT NULL -- PrimaryKey id --- UniqueKey (country_name, city_name, continent_name, most_specific_subdivision_name, time_zone) +-- UniqueKey (country_name, city_name, continent_name) -- -- Table dim_user_agent -- Column id serial NOT NULL -- Column browser_name character varying(50) NOT NULL -- Column os character varying(50) NOT NULL -- Column name character varying(100) NOT NULL --- Column type character varying(15) NOT NULL --- Column device character varying(15) NOT NULL -- PrimaryKey id --- UniqueKey (browser_name, os, name, type, device) +-- UniqueKey (browser_name, os, name) +-- +-- >>> mapM_ print . filter (`notElem` tables) $ extractDimensionTables env pageViewFact +-- Table dim_page_type +-- Column id serial NOT NULL +-- Column page_type character varying(20) NOT NULL +-- PrimaryKey id +-- UniqueKey (page_type) -- extractDimensionTables :: Env -> Fact -> [Table] extractDimensionTables env = flip runReader env . E.extractDimensionTables @@ -143,21 +203,32 @@ extractDimensionTables env = flip runReader env . E.extractDimensionTables -- , [ "session_events" , "dim_user_agent" , "dim_geo" ] -- ) -- ] +-- >>> putStrLn . ppShow $ extractDependencies env pageViewFact +-- fromList +-- [ ( "dim_page_type" , [ "page_view_events" ] ) +-- , ( "fact_page_view_by_minute" +-- , [ "page_view_events" +-- , "session_events" +-- , "dim_page_type" +-- , "referrers" +-- , "dim_user_agent" +-- , "dim_geo" +-- ] +-- ) +-- ] extractDependencies :: Env -> Fact -> Dependencies extractDependencies env = flip runReader env . E.extractDependencies -- | -- --- >>> let storySessionDimTables = extractDimensionTables env sessionFact --- >>> let sqls = map (dimensionTableDefnSQL env) storySessionDimTables +-- >>> let dimTables = filter (`notElem` tables) . nub . concatMap (extractDimensionTables env) $ facts +-- >>> let sqls = map (dimensionTableDefnSQL env) dimTables -- >>> mapM_ (\sqls -> mapM_ (putStr . Text.unpack) sqls >> putStrLn "--------" ) sqls -- create table dim_geo ( -- id serial not null, -- country_name character varying(50) not null, -- city_name character varying(50) not null, --- continent_name character varying(15) not null, --- most_specific_subdivision_name character varying(100) not null, --- time_zone character varying(20) not null +-- continent_name character varying(15) not null -- ) -- ; -- @@ -165,9 +236,7 @@ extractDependencies env = flip runReader env . E.extractDependencies -- -- alter table dim_geo add unique (country_name, -- city_name, --- continent_name, --- most_specific_subdivision_name, --- time_zone); +-- continent_name); -- -- create index on dim_geo (country_name) -- ; @@ -175,28 +244,18 @@ extractDependencies env = flip runReader env . E.extractDependencies -- ; -- create index on dim_geo (continent_name) -- ; --- create index on dim_geo (most_specific_subdivision_name) --- ; --- create index on dim_geo (time_zone) --- ; -- -------- -- create table dim_user_agent ( -- id serial not null, -- browser_name character varying(50) not null, -- os character varying(50) not null, --- name character varying(100) not null, --- type character varying(15) not null, --- device character varying(15) not null +-- name character varying(100) not null -- ) -- ; -- -- alter table dim_user_agent add primary key (id); -- --- alter table dim_user_agent add unique (browser_name, --- os, --- name, --- type, --- device); +-- alter table dim_user_agent add unique (browser_name, os, name); -- -- create index on dim_user_agent (browser_name) -- ; @@ -204,10 +263,17 @@ extractDependencies env = flip runReader env . E.extractDependencies -- ; -- create index on dim_user_agent (name) -- ; --- create index on dim_user_agent (type) --- ; --- create index on dim_user_agent (device) +-- -------- +-- create table dim_page_type ( +-- id serial not null, +-- page_type character varying(20) not null +-- ) -- ; +-- +-- alter table dim_page_type add primary key (id); +-- +-- alter table dim_page_type add unique (page_type); +-- -- -------- dimensionTableDefnSQL :: Env -> Table -> [Text] dimensionTableDefnSQL env = flip runReader env . G.dimensionTableDefnSQL @@ -239,6 +305,39 @@ dimensionTableDefnSQL env = flip runReader env . G.dimensionTableDefnSQL -- ; -- create index on fact_session_by_minute (user_agent_id) -- ; +-- >>> let pageViewFactTable = extractFactTable env pageViewFact +-- >>> let sqls = factTableDefnSQL env pageViewFact pageViewFactTable +-- >>> mapM_ (putStr . Text.unpack) sqls +-- create table fact_page_view_by_minute ( +-- created_at_minute_id bigint not null, +-- publisher_id integer not null, +-- view_count integer not null, +-- referrer_id integer not null, +-- page_type_id integer not null, +-- geo_id integer not null, +-- user_agent_id integer not null +-- ) +-- ; +-- +-- alter table fact_page_view_by_minute add unique (created_at_minute_id, +-- publisher_id, +-- referrer_id, +-- page_type_id, +-- geo_id, +-- user_agent_id); +-- +-- create index on fact_page_view_by_minute (created_at_minute_id) +-- ; +-- create index on fact_page_view_by_minute (publisher_id) +-- ; +-- create index on fact_page_view_by_minute (referrer_id) +-- ; +-- create index on fact_page_view_by_minute (page_type_id) +-- ; +-- create index on fact_page_view_by_minute (geo_id) +-- ; +-- create index on fact_page_view_by_minute (user_agent_id) +-- ; factTableDefnSQL :: Env -> Fact -> Table -> [Text] factTableDefnSQL env fact = flip runReader env . G.factTableDefnSQL fact @@ -247,60 +346,46 @@ factTableDefnSQL env fact = flip runReader env . G.factTableDefnSQL fact -- >>> let storySessionDimTableNames = map tableName $ extractDimensionTables env sessionFact -- >>> let sqls = map (dimensionTablePopulateSQL FullPopulation env sessionFact) storySessionDimTableNames -- >>> mapM_ (putStr . Text.unpack) sqls --- insert into dim_geo (country_name, --- city_name, --- continent_name, --- most_specific_subdivision_name, --- time_zone) +-- insert into dim_geo (country_name, city_name, continent_name) -- select distinct -- coalesce(session_events.geo_country_name,'__UNKNOWN_VAL__') as geo_country_name, -- coalesce(session_events.geo_city_name,'__UNKNOWN_VAL__') as geo_city_name, --- coalesce(session_events.geo_continent_name,'__UNKNOWN_VAL__') as geo_continent_name, --- coalesce(session_events.geo_most_specific_subdivision_name,'__UNKNOWN_VAL__') as geo_most_specific_subdivision_name, --- coalesce(session_events.geo_time_zone,'__UNKNOWN_VAL__') as geo_time_zone +-- coalesce(session_events.geo_continent_name,'__UNKNOWN_VAL__') as geo_continent_name -- from -- session_events -- where --- (geo_country_name is not null or geo_city_name is not null or geo_continent_name is not null or geo_most_specific_subdivision_name is not null or geo_time_zone is not null) +-- (geo_country_name is not null or geo_city_name is not null or geo_continent_name is not null) -- and -- created_at < ? -- ; -- --- insert into dim_user_agent (browser_name, os, name, type, device) +-- insert into dim_user_agent (browser_name, os, name) -- select distinct -- coalesce(session_events.browser_name,'__UNKNOWN_VAL__') as browser_name, -- coalesce(session_events.os,'__UNKNOWN_VAL__') as os, --- coalesce(session_events.user_agent_name,'__UNKNOWN_VAL__') as user_agent_name, --- coalesce(session_events.user_agent_type,'__UNKNOWN_VAL__') as user_agent_type, --- coalesce(session_events.user_agent_device,'__UNKNOWN_VAL__') as user_agent_device +-- coalesce(session_events.user_agent_name,'__UNKNOWN_VAL__') as user_agent_name -- from -- session_events -- where --- (browser_name is not null or os is not null or user_agent_name is not null or user_agent_type is not null or user_agent_device is not null) +-- (browser_name is not null or os is not null or user_agent_name is not null) -- and -- created_at < ? -- ; -- -- >>> let sqls = map (dimensionTablePopulateSQL IncrementalPopulation env sessionFact) storySessionDimTableNames -- >>> mapM_ (putStr . Text.unpack) sqls --- insert into dim_geo (country_name, --- city_name, --- continent_name, --- most_specific_subdivision_name, --- time_zone) +-- insert into dim_geo (country_name, city_name, continent_name) -- select -- x.* -- from -- (select distinct -- coalesce(session_events.geo_country_name,'__UNKNOWN_VAL__') as geo_country_name, -- coalesce(session_events.geo_city_name,'__UNKNOWN_VAL__') as geo_city_name, --- coalesce(session_events.geo_continent_name,'__UNKNOWN_VAL__') as geo_continent_name, --- coalesce(session_events.geo_most_specific_subdivision_name,'__UNKNOWN_VAL__') as geo_most_specific_subdivision_name, --- coalesce(session_events.geo_time_zone,'__UNKNOWN_VAL__') as geo_time_zone +-- coalesce(session_events.geo_continent_name,'__UNKNOWN_VAL__') as geo_continent_name -- from -- session_events -- where --- (geo_country_name is not null or geo_city_name is not null or geo_continent_name is not null or geo_most_specific_subdivision_name is not null or geo_time_zone is not null) +-- (geo_country_name is not null or geo_city_name is not null or geo_continent_name is not null) -- and -- created_at < ? -- and @@ -312,34 +397,24 @@ factTableDefnSQL env fact = flip runReader env . G.factTableDefnSQL fact -- dim_geo.city_name = x.geo_city_name -- and -- dim_geo.continent_name = x.geo_continent_name --- and --- dim_geo.most_specific_subdivision_name = x.geo_most_specific_subdivision_name --- and --- dim_geo.time_zone = x.geo_time_zone -- where -- dim_geo.country_name is null and dim_geo.city_name is null -- and -- dim_geo.continent_name is null --- and --- dim_geo.most_specific_subdivision_name is null --- and --- dim_geo.time_zone is null -- ; -- --- insert into dim_user_agent (browser_name, os, name, type, device) +-- insert into dim_user_agent (browser_name, os, name) -- select -- x.* -- from -- (select distinct -- coalesce(session_events.browser_name,'__UNKNOWN_VAL__') as browser_name, -- coalesce(session_events.os,'__UNKNOWN_VAL__') as os, --- coalesce(session_events.user_agent_name,'__UNKNOWN_VAL__') as user_agent_name, --- coalesce(session_events.user_agent_type,'__UNKNOWN_VAL__') as user_agent_type, --- coalesce(session_events.user_agent_device,'__UNKNOWN_VAL__') as user_agent_device +-- coalesce(session_events.user_agent_name,'__UNKNOWN_VAL__') as user_agent_name -- from -- session_events -- where --- (browser_name is not null or os is not null or user_agent_name is not null or user_agent_type is not null or user_agent_device is not null) +-- (browser_name is not null or os is not null or user_agent_name is not null) -- and -- created_at < ? -- and @@ -351,18 +426,43 @@ factTableDefnSQL env fact = flip runReader env . G.factTableDefnSQL fact -- dim_user_agent.os = x.os -- and -- dim_user_agent.name = x.user_agent_name --- and --- dim_user_agent.type = x.user_agent_type --- and --- dim_user_agent.device = x.user_agent_device -- where -- dim_user_agent.browser_name is null and dim_user_agent.os is null -- and -- dim_user_agent.name is null --- and --- dim_user_agent.type is null --- and --- dim_user_agent.device is null +-- ; +-- +-- >>> let pageViewDimTableNames = map tableName . filter (`notElem` tables) $ extractDimensionTables env pageViewFact +-- >>> let sqls = map (dimensionTablePopulateSQL FullPopulation env pageViewFact) pageViewDimTableNames +-- >>> mapM_ (putStr . Text.unpack) sqls +-- insert into dim_page_type (page_type) +-- select distinct +-- page_view_events.page_type as page_type +-- from +-- page_view_events +-- where +-- (page_type is not null) and created_at < ? +-- ; +-- +-- >>> let sqls = map (dimensionTablePopulateSQL IncrementalPopulation env pageViewFact) pageViewDimTableNames +-- >>> mapM_ (putStr . Text.unpack) sqls +-- insert into dim_page_type (page_type) +-- select +-- x.* +-- from +-- (select distinct +-- page_view_events.page_type as page_type +-- from +-- page_view_events +-- where +-- (page_type is not null) and created_at < ? +-- and +-- created_at >= ?) as x +-- left outer join +-- dim_page_type +-- on dim_page_type.page_type = x.page_type +-- where +-- dim_page_type.page_type is null -- ; -- dimensionTablePopulateSQL :: TablePopulationMode -> Env -> Fact -> TableName -> Text @@ -391,11 +491,7 @@ dimensionTablePopulateSQL popMode env fact = -- and -- dim_geo.city_name = coalesce(session_events.geo_city_name,'__UNKNOWN_VAL__') -- and --- dim_geo.continent_name = coalesce(session_events.geo_continent_name,'__UNKNOWN_VAL__') --- and --- dim_geo.most_specific_subdivision_name = coalesce(session_events.geo_most_specific_subdivision_name,'__UNKNOWN_VAL__') --- and --- dim_geo.time_zone = coalesce(session_events.geo_time_zone,'__UNKNOWN_VAL__')),-1) as xxff_geo_id, +-- dim_geo.continent_name = coalesce(session_events.geo_continent_name,'__UNKNOWN_VAL__')),-1) as xxff_geo_id, -- coalesce((select -- id -- from @@ -405,11 +501,7 @@ dimensionTablePopulateSQL popMode env fact = -- and -- dim_user_agent.os = coalesce(session_events.os,'__UNKNOWN_VAL__') -- and --- dim_user_agent.name = coalesce(session_events.user_agent_name,'__UNKNOWN_VAL__') --- and --- dim_user_agent.type = coalesce(session_events.user_agent_type,'__UNKNOWN_VAL__') --- and --- dim_user_agent.device = coalesce(session_events.user_agent_device,'__UNKNOWN_VAL__')),-1) as xxff_user_agent_id +-- dim_user_agent.name = coalesce(session_events.user_agent_name,'__UNKNOWN_VAL__')),-1) as xxff_user_agent_id -- from -- session_events -- where @@ -441,11 +533,7 @@ dimensionTablePopulateSQL popMode env fact = -- and -- dim_geo.city_name = coalesce(session_events.geo_city_name,'__UNKNOWN_VAL__') -- and --- dim_geo.continent_name = coalesce(session_events.geo_continent_name,'__UNKNOWN_VAL__') --- and --- dim_geo.most_specific_subdivision_name = coalesce(session_events.geo_most_specific_subdivision_name,'__UNKNOWN_VAL__') --- and --- dim_geo.time_zone = coalesce(session_events.geo_time_zone,'__UNKNOWN_VAL__')),-1) as xxff_geo_id, +-- dim_geo.continent_name = coalesce(session_events.geo_continent_name,'__UNKNOWN_VAL__')),-1) as xxff_geo_id, -- coalesce((select -- id -- from @@ -455,11 +543,7 @@ dimensionTablePopulateSQL popMode env fact = -- and -- dim_user_agent.os = coalesce(session_events.os,'__UNKNOWN_VAL__') -- and --- dim_user_agent.name = coalesce(session_events.user_agent_name,'__UNKNOWN_VAL__') --- and --- dim_user_agent.type = coalesce(session_events.user_agent_type,'__UNKNOWN_VAL__') --- and --- dim_user_agent.device = coalesce(session_events.user_agent_device,'__UNKNOWN_VAL__')),-1) as xxff_user_agent_id +-- dim_user_agent.name = coalesce(session_events.user_agent_name,'__UNKNOWN_VAL__')),-1) as xxff_user_agent_id -- from -- session_events -- where @@ -471,20 +555,78 @@ dimensionTablePopulateSQL popMode env fact = -- xxff_user_agent_id -- ; -- +-- >>> let sqls = factTablePopulateSQL FullPopulation env pageViewFact +-- >>> mapM_ (putStr . Text.unpack) sqls +-- insert into fact_page_view_by_minute (created_at_minute_id, +-- publisher_id, +-- view_count, +-- referrer_id, +-- page_type_id, +-- geo_id, +-- user_agent_id) +-- select +-- cast(floor(extract(epoch from page_view_events.created_at) / 60) as bigint) as xxff_created_at_minute_id, +-- page_view_events.publisher_id as xxff_publisher_id, +-- count(*) as xxff_view_count, +-- coalesce(page_view_events.referrer_id,-1) as xxff_referrer_id, +-- coalesce((select +-- id +-- from +-- dim_page_type as dim_page_type +-- where +-- dim_page_type.page_type = page_view_events.page_type),-1) as xxff_page_type_id, +-- coalesce((select +-- id +-- from +-- dim_geo as dim_geo +-- where +-- dim_geo.country_name = coalesce(session_events.geo_country_name,'__UNKNOWN_VAL__') +-- and +-- dim_geo.city_name = coalesce(session_events.geo_city_name,'__UNKNOWN_VAL__') +-- and +-- dim_geo.continent_name = coalesce(session_events.geo_continent_name,'__UNKNOWN_VAL__')),-1) as xxff_geo_id, +-- coalesce((select +-- id +-- from +-- dim_user_agent as dim_user_agent +-- where +-- dim_user_agent.browser_name = coalesce(session_events.browser_name,'__UNKNOWN_VAL__') +-- and +-- dim_user_agent.os = coalesce(session_events.os,'__UNKNOWN_VAL__') +-- and +-- dim_user_agent.name = coalesce(session_events.user_agent_name,'__UNKNOWN_VAL__')),-1) as xxff_user_agent_id +-- from +-- page_view_events +-- left outer join +-- session_events +-- on page_view_events.session_event_id = session_events.id +-- and +-- page_view_events.publisher_id = session_events.publisher_id +-- where +-- page_view_events.created_at < ? +-- group by +-- xxff_created_at_minute_id, +-- xxff_publisher_id, +-- xxff_referrer_id, +-- xxff_page_type_id, +-- xxff_geo_id, +-- xxff_user_agent_id +-- ; +-- factTablePopulateSQL :: TablePopulationMode -> Env -> Fact -> [Text] factTablePopulateSQL popMode env = flip runReader env . G.factTablePopulateSQL popMode -- | -- --- >>> validateTable env sessionEventsTable +-- >>> concatMap (validateTable env) tables -- [] validateTable :: Env -> Table -> [ValidationError] validateTable env = flip runReader env . V.validateTable -- | -- --- >>> validateFact env sessionFact +-- >>> concatMap (validateFact env) facts -- [] validateFact :: Env -> Fact -> [ValidationError] validateFact env = flip runReader env . V.validateFact diff --git a/src/Ringo/Extractor.hs b/src/Ringo/Extractor.hs index 6149fa7..d780ac4 100644 --- a/src/Ringo/Extractor.hs +++ b/src/Ringo/Extractor.hs @@ -48,9 +48,9 @@ extractFactTable fact = do FactCountDistinct _ cName -> [ Column cName "json" NotNull ] _ -> [] - fkColumns = for allDims $ \(_, Table {..}) -> - let colName = factDimFKIdColumnName settingDimPrefix dimIdColName tableName - colType = idColTypeToFKIdColType settingDimTableIdColumnType + fkColumns = for allDims $ \(dimFact, dimTable) -> + let colName = factDimFKIdColumnName settingDimPrefix dimIdColName dimFact dimTable tables + colType = idColTypeToFKIdColType settingDimTableIdColumnType in Column colName colType NotNull ukColNames = diff --git a/src/Ringo/Extractor/Internal.hs b/src/Ringo/Extractor/Internal.hs index 1b5c59c..2b966ce 100644 --- a/src/Ringo/Extractor/Internal.hs +++ b/src/Ringo/Extractor/Internal.hs @@ -38,9 +38,11 @@ timeUnitColumnName :: Text -> ColumnName -> TimeUnit -> ColumnName timeUnitColumnName dimIdColName colName timeUnit = colName <> "_" <> timeUnitName timeUnit <> "_" <> dimIdColName -factDimFKIdColumnName :: Text -> Text -> TableName -> ColumnName -factDimFKIdColumnName dimPrefix dimIdColName dimTableName = - fromMaybe dimTableName (Text.stripPrefix dimPrefix dimTableName) <> "_" <> dimIdColName +factDimFKIdColumnName :: Text -> Text -> Fact -> Table -> [Table] -> ColumnName +factDimFKIdColumnName dimPrefix dimIdColName dimFact dimTable@Table { .. } tables = + if dimTable `elem` tables + then head [ cName | DimId tName cName <- factColumns dimFact, tName == tableName ] + else fromMaybe tableName (Text.stripPrefix dimPrefix tableName) <> "_" <> dimIdColName extractedFactTableName :: Text -> Text -> TableName -> TimeUnit -> TableName extractedFactTableName factPrefix factInfix factName timeUnit = diff --git a/src/Ringo/Generator/Create.hs b/src/Ringo/Generator/Create.hs index 711ad86..5f0151c 100644 --- a/src/Ringo/Generator/Create.hs +++ b/src/Ringo/Generator/Create.hs @@ -70,6 +70,7 @@ factTableDefnSQL fact table = tableDefnSQL table (factTableIndexStmts fact) factTableIndexStmts :: Fact -> Table -> Reader Env [Statement] factTableIndexStmts fact table = do Settings {..} <- asks envSettings + tables <- asks envTables allDims <- extractAllDimensionTables fact let dimTimeCol = head [ cName | DimTime cName <- factColumns fact ] @@ -83,8 +84,8 @@ factTableIndexStmts fact table = do TenantId cName -> Just [cName] _ -> Nothing - dimCols = [ [factDimFKIdColumnName settingDimPrefix settingDimTableIdColumnName tableName] - | (_, Table {..}) <- allDims ] + dimCols = [ [ factDimFKIdColumnName settingDimPrefix settingDimTableIdColumnName dimFact dimTable tables ] + | (dimFact, dimTable) <- allDims ] return [ CreateIndexTSQL ea (nmc "") (name $ tabName) (map nmc cols) | cols <- factCols ++ dimCols ++ [ [cName, dimTimeColName dimTimeCol] diff --git a/src/Ringo/Generator/Populate/Fact.hs b/src/Ringo/Generator/Populate/Fact.hs index a580e31..d4e7428 100644 --- a/src/Ringo/Generator/Populate/Fact.hs +++ b/src/Ringo/Generator/Populate/Fact.hs @@ -160,7 +160,8 @@ factTablePopulateStmts popMode fact = do _ -> [] dimColMap = for allDims $ \(dimFact, factTable@Table {tableName}) -> let - dimFKIdColName = factDimFKIdColumnName settingDimPrefix dimIdColName tableName + dimFKIdColName = + factDimFKIdColumnName settingDimPrefix dimIdColName dimFact factTable tables factSourceTableName = factTableName dimFact factSourceTable = fromJust . findTable factSourceTableName $ tables dimFKIdColumn = fromJust . findColumn dimFKIdColName $ tableColumns factSourceTable @@ -168,16 +169,16 @@ factTablePopulateStmts popMode fact = do [ binop "=" (eqi tableName dimColName) (coalesceColumn defaults factSourceTableName sourceCol) | (dimColName, sourceColName) <- dimColumnMapping settingDimPrefix dimFact tableName , let sourceCol = fromJust . findColumn sourceColName $ tableColumns factSourceTable ] - insertExpr = if factTable `elem` tables -- existing dimension table + insertExpr = if factTable `elem` tables -- existing dimension table then (if columnNullable dimFKIdColumn == Null then coalesceFKId else id) $ eqi factSourceTableName dimFKIdColName else coalesceFKId . subQueryExp $ - makeSelect - { selSelectList = sl [ si $ ei dimIdColName ] - , selTref = - [ trefa (suffixTableName popMode settingTableNameSuffixTemplate tableName) tableName ] - , selWhere = dimLookupWhereClauses - } + makeSelect + { selSelectList = sl [ si $ ei dimIdColName ] + , selTref = + [ trefa (suffixTableName popMode settingTableNameSuffixTemplate tableName) tableName ] + , selWhere = dimLookupWhereClauses + } in (dimFKIdColName, insertExpr, True) colMap = [ (cName, (expr, nmc $ groupByColPrefix <> cName), addToGroupBy) From 8c3c4d801de9ede387dac57748a160458adcab7e Mon Sep 17 00:00:00 2001 From: Abhinav Sarkar Date: Wed, 27 Jan 2016 01:09:07 +0530 Subject: [PATCH 09/10] Moves to stackage lts-5. --- .travis.yml | 1 + stack.yaml | 2 +- 2 files changed, 2 insertions(+), 1 deletion(-) diff --git a/.travis.yml b/.travis.yml index b949433..5e4ed2c 100644 --- a/.travis.yml +++ b/.travis.yml @@ -17,6 +17,7 @@ env: - ARGS="--resolver lts-2" - ARGS="--resolver lts-3" - ARGS="--resolver lts-4" +- ARGS="--resolver lts-5" - ARGS="--resolver lts" - ARGS="--resolver nightly" diff --git a/stack.yaml b/stack.yaml index c1b9d00..26532ef 100644 --- a/stack.yaml +++ b/stack.yaml @@ -1,7 +1,7 @@ # For more information, see: https://github.com/commercialhaskell/stack/blob/master/doc/yaml_configuration.md # Specifies the GHC version and set of packages available (e.g., lts-3.5, nightly-2015-09-21, ghc-7.10.2) -resolver: lts-3.20 +resolver: lts-5.0 # Local packages, usually specified by relative directory name packages: From 0f4970d587840004ba50261d199df4db35197ab3 Mon Sep 17 00:00:00 2001 From: Abhinav Sarkar Date: Wed, 3 Feb 2016 16:00:39 +0530 Subject: [PATCH 10/10] Changes FactColumn to use GADTs for better type safety. --- app/Ringo/InputParser.hs | 24 ++-- ringo.cabal | 6 +- src/Ringo.hs | 29 +++-- src/Ringo/Extractor.hs | 53 +++++---- src/Ringo/Extractor/Internal.hs | 15 ++- src/Ringo/Generator/Create.hs | 20 ++-- src/Ringo/Generator/Internal.hs | 7 +- src/Ringo/Generator/Populate/Dimension.hs | 4 +- src/Ringo/Generator/Populate/Fact.hs | 139 ++++++++++++---------- src/Ringo/Types.hs | 85 ++++++++----- src/Ringo/Utils.hs | 1 - src/Ringo/Validator.hs | 26 ++-- stack.yaml | 2 +- 13 files changed, 235 insertions(+), 176 deletions(-) diff --git a/app/Ringo/InputParser.hs b/app/Ringo/InputParser.hs index 24bfe29..c555006 100644 --- a/app/Ringo/InputParser.hs +++ b/app/Ringo/InputParser.hs @@ -53,17 +53,17 @@ instance FromJSON FactColumn where parseJSON (Object o) = do cType <- o .: "type" case cType of - "dimtime" -> DimTime <$> o .: "column" - "nodimid" -> NoDimId <$> o .: "column" - "tenantid" -> TenantId <$> o .: "column" - "dimid" -> DimId <$> o .: "table" <*> o .: "column" - "dimval" -> DimVal <$> o .: "table" <*> o .: "column" - "factcount" -> FactCount <$> o .:? "sourcecolumn" <*> o .: "column" - "factsum" -> FactSum <$> o .: "sourcecolumn" <*> o .: "column" - "factaverage" -> FactAverage <$> o .: "sourcecolumn" <*> o .: "column" - "factcountdistinct" -> FactCountDistinct <$> o .:? "sourcecolumn" <*> o .: "column" - "factmax" -> FactMax <$> o .: "sourcecolumn" <*> o .: "column" - "factmin" -> FactMin <$> o .: "sourcecolumn" <*> o .: "column" + "dimtime" -> FactColumn <$> o .: "column" <*> pure DimTime + "nodimid" -> FactColumn <$> o .: "column" <*> pure NoDimId + "tenantid" -> FactColumn <$> o .: "column" <*> pure TenantId + "dimid" -> FactColumn <$> o .: "column" <*> (DimId <$> o .: "table") + "dimval" -> FactColumn <$> o .: "column" <*> (DimVal <$> o .: "table") + "factcount" -> FactColumn <$> o .: "column" <*> (FactCount <$> o .:? "sourcecolumn") + "factcountdistinct" -> FactColumn <$> o .: "column" <*> (FactCountDistinct <$> o .:? "sourcecolumn") + "factsum" -> FactColumn <$> o .: "column" <*> (FactSum <$> o .: "sourcecolumn") + "factaverage" -> FactColumn <$> o .: "column" <*> (FactAverage <$> o .: "sourcecolumn") + "factmax" -> FactColumn <$> o .: "column" <*> (FactMax <$> o .: "sourcecolumn") + "factmin" -> FactColumn <$> o .: "column" <*> (FactMin <$> o .: "sourcecolumn") _ -> fail $ "Invalid fact column type: " ++ cType parseJSON o = fail $ "Cannot parse fact column: " ++ show o @@ -75,7 +75,7 @@ instance FromJSON Fact where <*> o .: "columns" parseJSON o = fail $ "Cannot parse fact: " ++ show o -data Input = Input [Table] [Fact] TypeDefaults deriving (Eq, Show) +data Input = Input [Table] [Fact] TypeDefaults deriving (Show) instance FromJSON Input where parseJSON (Object o) = Input <$> o .: "tables" <*> o .: "facts" <*> o .: "defaults" diff --git a/ringo.cabal b/ringo.cabal index e205901..f98d330 100644 --- a/ringo.cabal +++ b/ringo.cabal @@ -33,7 +33,8 @@ library mtl >=2.1 && <2.3, raw-strings-qq >=1.0 && <1.2, hssqlppp ==0.5.23 - ghc-options: -Wall -fno-warn-unused-do-bind -funbox-strict-fields -fno-warn-orphans -O2 + ghc-options: -Wall -Werror -fwarn-incomplete-uni-patterns -fno-warn-unused-do-bind + -fno-warn-orphans -funbox-strict-fields -O2 default-language: Haskell2010 executable ringo @@ -52,7 +53,8 @@ executable ringo filepath >=1.3 && <1.5, aeson >=0.8 && <0.11, ringo - ghc-options: -Wall -fno-warn-unused-do-bind -funbox-strict-fields -fno-warn-orphans -O2 + ghc-options: -Wall -Werror -fwarn-incomplete-uni-patterns -fno-warn-unused-do-bind + -fno-warn-orphans -funbox-strict-fields -O2 default-language: Haskell2010 test-suite ringo-test diff --git a/src/Ringo.hs b/src/Ringo.hs index efa4003..6e30c7b 100644 --- a/src/Ringo.hs +++ b/src/Ringo.hs @@ -1,5 +1,4 @@ {-# LANGUAGE OverloadedStrings #-} -{-# LANGUAGE RecordWildCards #-} module Ringo ( -- | The examples in this module assume the following code has been run. -- The :{ and :} will only work in GHCi. @@ -64,15 +63,15 @@ import qualified Ringo.Validator as V -- , factTablePersistent = True -- , factParentNames = [] -- , factColumns = --- [ DimTime "created_at" --- , NoDimId "publisher_id" --- , DimVal "user_agent" "browser_name" --- , DimVal "user_agent" "os" --- , DimVal "user_agent" "user_agent_name" --- , DimVal "geo" "geo_country_name" --- , DimVal "geo" "geo_city_name" --- , DimVal "geo" "geo_continent_name" --- , FactCount Nothing "session_count" +-- [ FactColumn "created_at" $ DimTime +-- , FactColumn "publisher_id" $ NoDimId +-- , FactColumn "browser_name" $ DimVal "user_agent" +-- , FactColumn "os" $ DimVal "user_agent" +-- , FactColumn "user_agent_name" $ DimVal "user_agent" +-- , FactColumn "geo_country_name" $ DimVal "geo" +-- , FactColumn "geo_city_name" $ DimVal "geo" +-- , FactColumn "geo_continent_name" $ DimVal "geo" +-- , FactColumn "session_count" $ FactCount Nothing -- ] -- } -- pageViewEventsTable = @@ -105,11 +104,11 @@ import qualified Ringo.Validator as V -- , factTablePersistent = True -- , factParentNames = [ "session" ] -- , factColumns = --- [ DimTime "created_at" --- , NoDimId "publisher_id" --- , DimVal "page_type" "page_type" --- , DimId "referrers" "referrer_id" --- , FactCount Nothing "view_count" +-- [ FactColumn "created_at" $ DimTime +-- , FactColumn "publisher_id" $ NoDimId +-- , FactColumn "page_type" $ DimVal "page_type" +-- , FactColumn "referrer_id" $ DimId "referrers" +-- , FactColumn "view_count" $ FactCount Nothing -- ] -- } -- referrersTable = diff --git a/src/Ringo/Extractor.hs b/src/Ringo/Extractor.hs index d780ac4..192acd8 100644 --- a/src/Ringo/Extractor.hs +++ b/src/Ringo/Extractor.hs @@ -1,5 +1,6 @@ {-# LANGUAGE OverloadedStrings #-} {-# LANGUAGE RecordWildCards #-} +{-# LANGUAGE GADTs #-} module Ringo.Extractor ( extractDimensionTables , extractAllDimensionTables @@ -32,21 +33,22 @@ extractFactTable fact = do notNullSourceColumnCopy cName = (sourceColumn cName) { columnNullable = NotNull } notNullSourceColumnRename scName cName = (notNullSourceColumnCopy scName) { columnName = cName } - columns = concatFor (factColumns fact) $ \col -> case col of - DimTime cName -> - [ Column (timeUnitColumnName dimIdColName cName settingTimeUnit) "bigint" NotNull ] - NoDimId cName -> [ notNullSourceColumnCopy cName ] - TenantId cName -> [ notNullSourceColumnCopy cName ] - FactCount _ cName -> [ Column cName countColType NotNull ] - FactSum scName cName -> [ notNullSourceColumnRename scName cName ] - FactMax scName cName -> [ notNullSourceColumnRename scName cName ] - FactMin scName cName -> [ notNullSourceColumnRename scName cName ] - FactAverage scName cName -> - [ Column (cName <> settingAvgCountColumSuffix) countColType NotNull - , notNullSourceColumnRename scName (cName <> settingAvgSumColumnSuffix) - ] - FactCountDistinct _ cName -> [ Column cName "json" NotNull ] - _ -> [] + columns = concatFor (factColumns fact) $ \FactColumn {factColTargetColumn = cName, ..} -> + case factColType of + DimTime -> + [ Column (timeUnitColumnName dimIdColName cName settingTimeUnit) "bigint" NotNull ] + NoDimId -> [ notNullSourceColumnCopy cName ] + TenantId -> [ notNullSourceColumnCopy cName ] + FactCount {..} -> [ Column cName countColType NotNull ] + FactCountDistinct {..} -> [ Column cName "json" NotNull ] + FactSum {..} -> [ notNullSourceColumnRename factColSourceColumn cName ] + FactMax {..} -> [ notNullSourceColumnRename factColSourceColumn cName ] + FactMin {..} -> [ notNullSourceColumnRename factColSourceColumn cName ] + FactAverage {..} -> + [ Column (cName <> settingAvgCountColumSuffix) countColType NotNull + , notNullSourceColumnRename factColSourceColumn (cName <> settingAvgSumColumnSuffix) + ] + _ -> [] fkColumns = for allDims $ \(dimFact, dimTable) -> let colName = factDimFKIdColumnName settingDimPrefix dimIdColName dimFact dimTable tables @@ -55,11 +57,12 @@ extractFactTable fact = do ukColNames = (++ map columnName fkColumns) - . forMaybe (factColumns fact) $ \col -> case col of - DimTime cName -> Just (timeUnitColumnName dimIdColName cName settingTimeUnit) - NoDimId cName -> Just cName - TenantId cName -> Just cName - _ -> Nothing + . forMaybe (factColumns fact) $ \FactColumn {factColTargetColumn = cName, ..} -> + case factColType of + DimTime -> Just $ timeUnitColumnName dimIdColName cName settingTimeUnit + NoDimId -> Just cName + TenantId -> Just cName + _ -> Nothing return Table { tableName = @@ -77,15 +80,15 @@ extractDependencies fact = do (factTableName fct, parentFacts fct facts) factDimDeps = nub . concat . Tree.flatten . flip Tree.unfoldTree fact $ \fct -> - ( forMaybe (factColumns fct) $ \col -> case col of - DimVal table _ -> Just $ settingDimPrefix <> table - DimId table _ -> Just table - _ -> Nothing + ( forMaybe (factColumns fct) $ \FactColumn {..} -> case factColType of + DimVal {..} -> Just $ settingDimPrefix <> factColTargetTable + DimId {..} -> Just factColTargetTable + _ -> Nothing , parentFacts fct facts ) dimDeps = Map.fromList [ (settingDimPrefix <> table, [factTableName fact]) - | DimVal table _ <- factColumns fact ] + | FactColumn {factColType = DimVal table} <- factColumns fact ] factDeps = Map.singleton (extractedTable settings) (factSourceDeps ++ factDimDeps) return $ Map.union dimDeps factDeps diff --git a/src/Ringo/Extractor/Internal.hs b/src/Ringo/Extractor/Internal.hs index 2b966ce..90d9690 100644 --- a/src/Ringo/Extractor/Internal.hs +++ b/src/Ringo/Extractor/Internal.hs @@ -2,6 +2,7 @@ {-# LANGUAGE RecordWildCards #-} {-# LANGUAGE TupleSections #-} {-# LANGUAGE CPP #-} +{-# LANGUAGE GADTs #-} module Ringo.Extractor.Internal where import qualified Data.Map as Map @@ -41,7 +42,9 @@ timeUnitColumnName dimIdColName colName timeUnit = factDimFKIdColumnName :: Text -> Text -> Fact -> Table -> [Table] -> ColumnName factDimFKIdColumnName dimPrefix dimIdColName dimFact dimTable@Table { .. } tables = if dimTable `elem` tables - then head [ cName | DimId tName cName <- factColumns dimFact, tName == tableName ] + then head [ factColTargetColumn + | FactColumn {factColType = DimId {..}, ..} <- factColumns dimFact + , factColTargetTable == tableName ] else fromMaybe tableName (Text.stripPrefix dimPrefix tableName) <> "_" <> dimIdColName extractedFactTableName :: Text -> Text -> TableName -> TimeUnit -> TableName @@ -62,7 +65,9 @@ extractDimensionTables fact = do let table = fromJust . findTable (factTableName fact) $ tables return $ dimsFromIds tables ++ dimsFromVals settings (tableColumns table) where - dimsFromIds tables = catMaybes [ findTable d tables | DimId d _ <- factColumns fact ] + dimsFromIds tables = + catMaybes [ findTable factColTargetTable tables + | FactColumn {factColType = DimId {..}} <- factColumns fact ] dimsFromVals Settings {..} tableColumns = map (\(dim, cols) -> @@ -81,9 +86,9 @@ extractDimensionTables fact = do . nub) . Map.fromListWith (flip (++)) . mapMaybe (\fcol -> do - DimVal d col <- fcol - column <- findColumn col tableColumns - return (d, [ column ])) + FactColumn {factColType = DimVal {..}, ..} <- fcol + column <- findColumn factColTargetColumn tableColumns + return (factColTargetTable, [ column ])) . map Just . factColumns $ fact diff --git a/src/Ringo/Generator/Create.hs b/src/Ringo/Generator/Create.hs index 5f0151c..2cc40a1 100644 --- a/src/Ringo/Generator/Create.hs +++ b/src/Ringo/Generator/Create.hs @@ -1,6 +1,9 @@ {-# LANGUAGE OverloadedStrings #-} {-# LANGUAGE RecordWildCards #-} {-# LANGUAGE CPP #-} +{-# LANGUAGE GADTs #-} +{-# LANGUAGE PatternSynonyms #-} + module Ringo.Generator.Create (dimensionTableDefnSQL, factTableDefnSQL) where #if MIN_VERSION_base(4,8,0) @@ -73,20 +76,21 @@ factTableIndexStmts fact table = do tables <- asks envTables allDims <- extractAllDimensionTables fact - let dimTimeCol = head [ cName | DimTime cName <- factColumns fact ] - tenantIdCol = listToMaybe [ cName | TenantId cName <- factColumns fact ] + let dimTimeCol = head [ cName | DimTimeV cName <- factColumns fact ] + tenantIdCol = listToMaybe [ cName | TenantIdV cName <- factColumns fact ] tabName = tableName table <> settingTableNameSuffixTemplate dimTimeColName cName = timeUnitColumnName settingDimTableIdColumnName cName settingTimeUnit - factCols = forMaybe (factColumns fact) $ \col -> case col of - DimTime cName -> Just [dimTimeColName cName] - NoDimId cName -> Just [cName] - TenantId cName -> Just [cName] - _ -> Nothing + factCols = forMaybe (factColumns fact) $ \FactColumn {factColTargetColumn = cName, ..} -> + case factColType of + DimTime -> Just [dimTimeColName cName] + NoDimId -> Just [cName] + TenantId -> Just [cName] + _ -> Nothing dimCols = [ [ factDimFKIdColumnName settingDimPrefix settingDimTableIdColumnName dimFact dimTable tables ] | (dimFact, dimTable) <- allDims ] - return [ CreateIndexTSQL ea (nmc "") (name $ tabName) (map nmc cols) + return [ CreateIndexTSQL ea (nmc "") (name tabName) (map nmc cols) | cols <- factCols ++ dimCols ++ [ [cName, dimTimeColName dimTimeCol] | cName <- maybeToList tenantIdCol ] ] diff --git a/src/Ringo/Generator/Internal.hs b/src/Ringo/Generator/Internal.hs index 245b940..f1fdbcf 100644 --- a/src/Ringo/Generator/Internal.hs +++ b/src/Ringo/Generator/Internal.hs @@ -1,5 +1,6 @@ {-# LANGUAGE OverloadedStrings #-} {-# LANGUAGE RecordWildCards #-} +{-# LANGUAGE GADTs #-} module Ringo.Generator.Internal where import qualified Data.Map as Map @@ -16,9 +17,9 @@ import Ringo.Types dimColumnMapping :: Text -> Fact -> TableName -> [(ColumnName, ColumnName)] dimColumnMapping dimPrefix fact dimTableName = - [ (dimColumnName dName cName, cName) - | DimVal dName cName <- factColumns fact - , dimPrefix <> dName == dimTableName ] + [ (dimColumnName factColTargetTable factColTargetColumn, factColTargetColumn) + | FactColumn { factColType = DimVal {..}, ..} <- factColumns fact + , dimPrefix <> factColTargetTable == dimTableName ] coalesceColumn :: TypeDefaults -> TableName -> Column -> ScalarExpr coalesceColumn defaults tName Column{..} = diff --git a/src/Ringo/Generator/Populate/Dimension.hs b/src/Ringo/Generator/Populate/Dimension.hs index f125fc9..8ba028a 100644 --- a/src/Ringo/Generator/Populate/Dimension.hs +++ b/src/Ringo/Generator/Populate/Dimension.hs @@ -1,6 +1,8 @@ {-# LANGUAGE OverloadedStrings #-} {-# LANGUAGE RecordWildCards #-} {-# LANGUAGE CPP #-} +{-# LANGUAGE PatternSynonyms #-} + module Ringo.Generator.Populate.Dimension (dimensionTablePopulateSQL) where #if MIN_VERSION_base(4,8,0) @@ -32,7 +34,7 @@ dimensionTablePopulateStmt popMode fact dimTableName = do selectCols = [ flip sia (nmc cName) $ coalesceColumn defaults (factTableName fact) col | (_, cName) <- colMapping , let col = fromJust . findColumn cName $ tableColumns factTable ] - timeCol = head [ cName | DimTime cName <- factColumns fact ] + timeCol = head ([ cName | DimTimeV cName <- factColumns fact ] :: [ColumnName]) isNotNullC = parens . foldBinop "or" . map (postop "isnotnull" . ei . snd) $ colMapping selectWhereC = Just . foldBinop "and" $ [ isNotNullC, binop "<" (ei timeCol) placeholder ] ++ diff --git a/src/Ringo/Generator/Populate/Fact.hs b/src/Ringo/Generator/Populate/Fact.hs index d4e7428..494bbc4 100644 --- a/src/Ringo/Generator/Populate/Fact.hs +++ b/src/Ringo/Generator/Populate/Fact.hs @@ -4,6 +4,9 @@ {-# LANGUAGE CPP #-} {-# LANGUAGE NamedFieldPuns #-} {-# LANGUAGE QuasiQuotes #-} +{-# LANGUAGE GADTs #-} +{-# LANGUAGE PatternSynonyms #-} + module Ringo.Generator.Populate.Fact (factTablePopulateSQL) where import qualified Data.Text as Text @@ -55,61 +58,66 @@ LANGUAGE 'plpgsql' IMMUTABLE; |] factCountDistinctUpdateStmts :: TablePopulationMode -> Fact -> Text -> QueryExpr -> Reader Env [Statement] -factCountDistinctUpdateStmts - popMode fact groupByColPrefix ~Select {selSelectList = SelectList _ origSelectItems, ..} = do - Settings {..} <- asks envSettings - tables <- asks envTables - let countDistinctCols = [ col | col@(FactCountDistinct _ _) <- factColumns fact] - fTableName = factTableName fact - fTable = fromJust . findTable fTableName $ tables - tablePKColName = head [ cName | PrimaryKey cName <- tableConstraints fTable ] - extFactTableName = - suffixTableName popMode settingTableNameSuffixTemplate - $ extractedFactTableName settingFactPrefix settingFactInfix (factName fact) settingTimeUnit +factCountDistinctUpdateStmts popMode fact groupByColPrefix expr = case expr of + Select {selSelectList = SelectList _ origSelectItems, ..} -> do + Settings {..} <- asks envSettings + tables <- asks envTables + let fTableName = factTableName fact + fTable = fromJust . findTable fTableName $ tables + tablePKColName = head [ cName | PrimaryKey cName <- tableConstraints fTable ] + extFactTableName = + suffixTableName popMode settingTableNameSuffixTemplate + $ extractedFactTableName settingFactPrefix settingFactInfix (factName fact) settingTimeUnit - return $ for countDistinctCols $ \(FactCountDistinct scName cName) -> - let unqCol = cast (eqi fTableName (fromMaybe tablePKColName scName)) "text" + return $ forMaybe (factColumns fact) $ \FactColumn {factColTargetColumn = cName, ..} -> + case factColType of + FactCountDistinct {factColMaybeSourceColumn = scName} -> + let unqCol = cast (eqi fTableName (fromMaybe tablePKColName scName)) "text" - bucketSelectCols = - [ sia (binop "&" (app "hashtext" [ unqCol ]) - (num . Text.pack . show $ bucketCount settingFactCountDistinctErrorRate - 1)) - (nmc $ cName <> "_bnum") - , sia (binop "-" - (num "31") - (app "ilog2" - [ app "min" [ binop "&" - (app "hashtext" [ unqCol ]) - (prefop "~" (parens (binop "<<" (num "1") (num "31"))))]])) - (nmc $ cName <> "_bhash") - ] + bucketSelectCols = + [ sia (binop "&" (app "hashtext" [ unqCol ]) + (num . Text.pack . show $ bucketCount settingFactCountDistinctErrorRate - 1)) + (nmc $ cName <> "_bnum") + , sia (binop "-" + (num "31") + (app "ilog2" + [ app "min" [ binop "&" + (app "hashtext" [ unqCol ]) + (prefop "~" (parens (binop "<<" (num "1") (num "31"))))]])) + (nmc $ cName <> "_bhash") + ] - groupByCols = map ppScalarExpr selGroupBy - selectList = - [ i | i@(SelectItem _ _ a) <- origSelectItems , a `elem` map nmc groupByCols ] + groupByCols = map ppScalarExpr selGroupBy + selectList = + [ i | i@(SelectItem _ _ a) <- origSelectItems , a `elem` map nmc groupByCols ] - selectStmt = - makeSelect - { selSelectList = sl $ selectList ++ bucketSelectCols - , selTref = selTref - , selWhere = binop "and" (postop "isnotnull" unqCol) <$> selWhere - , selGroupBy = selGroupBy ++ [ ei $ cName <> "_bnum" ] - } + selectStmt = + makeSelect + { selSelectList = sl $ selectList ++ bucketSelectCols + , selTref = selTref + , selWhere = binop "and" (postop "isnotnull" unqCol) <$> selWhere + , selGroupBy = selGroupBy ++ [ ei $ cName <> "_bnum" ] + } - aggSelectClause = - sia (app "json_object_agg" [ ei (cName <> "_bnum"), ei (cName <> "_bhash") ]) (nmc cName) + aggSelectClause = + sia (app "json_object_agg" [ ei (cName <> "_bnum"), ei (cName <> "_bhash") ]) (nmc cName) - in update extFactTableName - [ (cName, eqi "xyz" cName) ] - [ subtrefa "xyz" - makeSelect - { selSelectList = sl $ map (si . ei) groupByCols ++ [ aggSelectClause ] - , selTref = [ subtrefa "zyx" selectStmt ] - , selGroupBy = selGroupBy - } ] $ - foldBinop "and" - [ binop "=" (eqi extFactTableName . fromJust . Text.stripPrefix groupByColPrefix $ col) - (eqi "xyz" col) - | col <- groupByCols ] + in Just $ update extFactTableName + [ (cName, eqi "xyz" cName) ] + [ subtrefa "xyz" + makeSelect + { selSelectList = sl $ map (si . ei) groupByCols ++ [ aggSelectClause ] + , selTref = [ subtrefa "zyx" selectStmt ] + , selGroupBy = selGroupBy + } ] $ + foldBinop "and" + [ binop "=" (eqi extFactTableName . fromJust . Text.stripPrefix groupByColPrefix $ col) + (eqi "xyz" col) + | col <- groupByCols ] + + _ -> Nothing + + _ -> return [] where bucketCount :: Double -> Integer bucketCount errorRate = @@ -143,21 +151,22 @@ factTablePopulateStmts popMode fact = do app' f cName = app f [ eqi fTableName cName ] - factColMap = concatFor (factColumns fact) $ \col -> case col of - DimTime cName -> [ timeUnitColumnInsertSQL cName ] - NoDimId cName -> [ dimIdColumnInsertSQL cName ] - TenantId cName -> [ dimIdColumnInsertSQL cName ] - FactCount scName cName -> - [ (cName, app "count" [ maybe star (eqi fTableName) scName ], False) ] - FactSum scName cName -> [ (cName, app' "sum" scName, False) ] - FactMax scName cName -> [ (cName, app' "max" scName, False) ] - FactMin scName cName -> [ (cName, app' "min" scName, False) ] - FactAverage scName cName -> - [ ( cName <> settingAvgCountColumSuffix, app' "count" scName, False ) - , ( cName <> settingAvgSumColumnSuffix , app' "sum" scName , False) - ] - FactCountDistinct _ cName -> [ (cName, cast (str "{}") "json", False) ] - _ -> [] + factColMap = concatFor (factColumns fact) $ \FactColumn {factColTargetColumn = cName, ..} -> + case factColType of + DimTime -> [ timeUnitColumnInsertSQL cName ] + NoDimId -> [ dimIdColumnInsertSQL cName ] + TenantId -> [ dimIdColumnInsertSQL cName ] + FactCount {..} -> + [ (cName, app "count" [ maybe star (eqi fTableName) factColMaybeSourceColumn ], False) ] + FactCountDistinct {..} -> [ (cName, cast (str "{}") "json", False) ] + FactSum {..} -> [ (cName, app' "sum" factColSourceColumn, False) ] + FactMax {..} -> [ (cName, app' "max" factColSourceColumn, False) ] + FactMin {..} -> [ (cName, app' "min" factColSourceColumn, False) ] + FactAverage {..} -> + [ ( cName <> settingAvgCountColumSuffix, app' "count" factColSourceColumn, False ) + , ( cName <> settingAvgSumColumnSuffix , app' "sum" factColSourceColumn , False) + ] + _ -> [] dimColMap = for allDims $ \(dimFact, factTable@Table {tableName}) -> let dimFKIdColName = @@ -191,7 +200,7 @@ factTablePopulateStmts popMode fact = do . map (factTableName . fst) $ allDims - timeCol = eqi fTableName $ head [ cName | DimTime cName <- factColumns fact ] + timeCol = eqi fTableName $ head [ cName | DimTimeV cName <- factColumns fact ] extFactTableName = suffixTableName popMode settingTableNameSuffixTemplate $ extractedFactTableName settingFactPrefix settingFactInfix (factName fact) settingTimeUnit diff --git a/src/Ringo/Types.hs b/src/Ringo/Types.hs index af99067..24138d9 100644 --- a/src/Ringo/Types.hs +++ b/src/Ringo/Types.hs @@ -1,6 +1,10 @@ {-# LANGUAGE OverloadedStrings #-} {-# LANGUAGE RecordWildCards #-} -{-# LANGUAGE BangPatterns #-} +{-# LANGUAGE GADTs #-} +{-# LANGUAGE Rank2Types #-} +{-# LANGUAGE StandaloneDeriving #-} +{-# LANGUAGE PatternSynonyms #-} + module Ringo.Types where import qualified Data.Text as Text @@ -44,7 +48,6 @@ instance Show TableConstraint where show (UniqueKey cols) = "UniqueKey " ++ showColNames cols show (ForeignKey tName colMap) = "ForeignKey " ++ showColNames (map fst colMap) ++ " " ++ Text.unpack tName ++ " " ++ showColNames (map snd colMap) - data Table = Table { tableName :: !TableName , tableColumns :: ![Column] @@ -53,7 +56,7 @@ data Table = Table instance Show Table where show Table {..} = - unlines $ ("Table " ++ Text.unpack tableName) : (map show tableColumns) ++ (map show tableConstraints) + unlines $ ("Table " ++ Text.unpack tableName) : map show tableColumns ++ map show tableConstraints data TimeUnit = Second | Minute | Hour | Day | Week deriving (Eq, Enum, Show, Read) @@ -74,33 +77,59 @@ data Fact = Fact , factTablePersistent :: !Bool , factParentNames :: ![TableName] , factColumns :: ![FactColumn] - } deriving (Eq, Show) + } deriving (Show) -data FactColumn = DimTime !ColumnName - | NoDimId !ColumnName - | TenantId !ColumnName - | DimId !TableName !ColumnName - | DimVal !TableName !ColumnName - | FactCount !(Maybe ColumnName) !ColumnName - | FactSum !ColumnName !ColumnName - | FactAverage !ColumnName !ColumnName - | FactCountDistinct !(Maybe ColumnName) !ColumnName - | FactMax !ColumnName !ColumnName - | FactMin !ColumnName !ColumnName - deriving (Eq, Show) +data FCTNone +data FCTTargetTable +data FCTMaybeSourceColumn +data FCTSourceColumn + +data FactColumnType a where + DimTime :: FactColumnType FCTNone + NoDimId :: FactColumnType FCTNone + TenantId :: FactColumnType FCTNone + DimId :: { factColTargetTable :: !TableName } -> FactColumnType FCTTargetTable + DimVal :: { factColTargetTable :: !TableName } -> FactColumnType FCTTargetTable + FactCount :: { factColMaybeSourceColumn :: !(Maybe ColumnName) } -> FactColumnType FCTMaybeSourceColumn + FactCountDistinct :: { factColMaybeSourceColumn :: !(Maybe ColumnName) } -> FactColumnType FCTMaybeSourceColumn + FactSum :: { factColSourceColumn :: !ColumnName } -> FactColumnType FCTSourceColumn + FactAverage :: { factColSourceColumn :: !ColumnName } -> FactColumnType FCTSourceColumn + FactMax :: { factColSourceColumn :: !ColumnName } -> FactColumnType FCTSourceColumn + FactMin :: { factColSourceColumn :: !ColumnName } -> FactColumnType FCTSourceColumn + +deriving instance Show (FactColumnType a) + +pattern DimTimeV col <- FactColumn col DimTime +pattern NoDimIdV col <- FactColumn col NoDimId +pattern TenantIdV col <- FactColumn col TenantId +pattern DimIdV col <- FactColumn col DimId {..} +pattern DimValV col <- FactColumn col DimVal {..} +pattern FactCountV col <- FactColumn col FactCount {..} +pattern FactCountDistinctV col <- FactColumn col FactCountDistinct {..} +pattern FactSumV col <- FactColumn col FactSum {..} +pattern FactAverageV col <- FactColumn col FactAverage {..} +pattern FactMaxV col <- FactColumn col FactMax {..} +pattern FactMinV col <- FactColumn col FactMin {..} + +data FactColumn = forall a. FactColumn + { factColTargetColumn :: !ColumnName + , factColType :: FactColumnType a } + +deriving instance Show FactColumn factSourceColumnName :: FactColumn -> Maybe ColumnName -factSourceColumnName (DimTime cName) = Just cName -factSourceColumnName (NoDimId cName) = Just cName -factSourceColumnName (TenantId cName) = Just cName -factSourceColumnName (DimId _ cName) = Just cName -factSourceColumnName (DimVal _ cName) = Just cName -factSourceColumnName (FactCount cName _) = cName -factSourceColumnName (FactSum cName _) = Just cName -factSourceColumnName (FactAverage cName _) = Just cName -factSourceColumnName (FactCountDistinct cName _) = cName -factSourceColumnName (FactMax cName _) = Just cName -factSourceColumnName (FactMin cName _) = Just cName +factSourceColumnName FactColumn {..} = case factColType of + DimTime -> Just factColTargetColumn + NoDimId -> Just factColTargetColumn + TenantId -> Just factColTargetColumn + DimId {..} -> Just factColTargetColumn + DimVal {..} -> Just factColTargetColumn + FactCount {..} -> factColMaybeSourceColumn + FactCountDistinct {..} -> factColMaybeSourceColumn + FactSum {..} -> Just factColSourceColumn + FactAverage {..} -> Just factColSourceColumn + FactMax {..} -> Just factColSourceColumn + FactMin {..} -> Just factColSourceColumn data Settings = Settings { settingDimPrefix :: !Text @@ -154,7 +183,7 @@ data Env = Env , envFacts :: ![Fact] , envSettings :: !Settings , envTypeDefaults :: !TypeDefaults - } deriving (Eq, Show) + } deriving (Show) data TablePopulationMode = FullPopulation | IncrementalPopulation deriving (Eq, Show) diff --git a/src/Ringo/Utils.hs b/src/Ringo/Utils.hs index cc94de9..e27980d 100644 --- a/src/Ringo/Utils.hs +++ b/src/Ringo/Utils.hs @@ -1,5 +1,4 @@ {-# LANGUAGE OverloadedStrings #-} -{-# LANGUAGE RecordWildCards #-} {-# LANGUAGE CPP #-} module Ringo.Utils where diff --git a/src/Ringo/Validator.hs b/src/Ringo/Validator.hs index 1e75bb4..c22e083 100644 --- a/src/Ringo/Validator.hs +++ b/src/Ringo/Validator.hs @@ -1,6 +1,9 @@ {-# LANGUAGE OverloadedStrings #-} {-# LANGUAGE RecordWildCards #-} {-# LANGUAGE CPP #-} +{-# LANGUAGE GADTs #-} +{-# LANGUAGE PatternSynonyms #-} + module Ringo.Validator ( validateTable , validateFact @@ -51,17 +54,19 @@ validateFact Fact {..} = do parentVs <- concat <$> mapM checkFactParents factParentNames let colVs = concatMap (checkColumn tables table) factColumns timeVs = [ MissingTimeColumn factTableName - | null [ c | DimTime c <- factColumns ] ] - notNullVs = [ MissingNotNullConstraint factTableName c - | DimTime c <- factColumns - , let col = findColumn c (tableColumns table) + | null ([ cName | DimTimeV cName <- factColumns ] :: [ColumnName]) ] + notNullVs = [ MissingNotNullConstraint factTableName cName + | DimTimeV cName <- factColumns + , let col = findColumn cName (tableColumns table) , isJust col , columnNullable (fromJust col) == Null ] + typeDefaultVs = [ MissingTypeDefault cType - | cName <- [ c | DimVal _ c <- factColumns ] - ++ [ c | NoDimId c <- factColumns ] - ++ [ c | TenantId c <- factColumns ] + | cName <- [ c | DimValV c <- factColumns ] + ++ [ c | NoDimIdV c <- factColumns ] + ++ [ c | TenantIdV c <- factColumns ] + ++ [ c | DimIdV c <- factColumns ] , let col = findColumn cName (tableColumns table) , isJust col , let cType = columnType $ fromJust col @@ -79,6 +84,7 @@ validateFact Fact {..} = do maybe [] (checkTableForCol table) (factSourceColumnName factCol) ++ checkColumnTable tables factCol - checkColumnTable tables factCol = case factCol of - DimId tName _ -> maybe [ MissingTable tName ] (const []) $ findTable tName tables - _ -> [] + checkColumnTable :: [Table] -> FactColumn -> [ValidationError] + checkColumnTable tables FactColumn {..} = case factColType of + DimId {factColTargetTable = tName} -> maybe [ MissingTable tName ] (const []) $ findTable tName tables + _ -> [] diff --git a/stack.yaml b/stack.yaml index 26532ef..f1930e6 100644 --- a/stack.yaml +++ b/stack.yaml @@ -1,7 +1,7 @@ # For more information, see: https://github.com/commercialhaskell/stack/blob/master/doc/yaml_configuration.md # Specifies the GHC version and set of packages available (e.g., lts-3.5, nightly-2015-09-21, ghc-7.10.2) -resolver: lts-5.0 +resolver: lts-5.1 # Local packages, usually specified by relative directory name packages: