diff --git a/.travis.yml b/.travis.yml index 26f6fb0..b949433 100644 --- a/.travis.yml +++ b/.travis.yml @@ -16,6 +16,7 @@ env: - ARGS="" - ARGS="--resolver lts-2" - ARGS="--resolver lts-3" +- ARGS="--resolver lts-4" - ARGS="--resolver lts" - ARGS="--resolver nightly" diff --git a/app/Main.hs b/app/Main.hs index 2fa0119..f36329e 100644 --- a/app/Main.hs +++ b/app/Main.hs @@ -54,7 +54,7 @@ writeFiles outputDir env@Env{..} = do where dimTables = [ (fact, extractDimensionTables env fact) | fact <- envFacts ] - factTables = [ (fact, extractFactTable env fact) | fact <- envFacts ] + factTables = [ (fact, extractFactTable env fact) | fact <- envFacts, factTablePersistent fact ] dimTableDefnSQLs = [ (Create, tableName table, unlines . map sqlStr $ dimensionTableDefnSQL env table) | (_, tabs) <- dimTables diff --git a/app/Ringo/InputParser.hs b/app/Ringo/InputParser.hs index 6182bfe..24bfe29 100644 --- a/app/Ringo/InputParser.hs +++ b/app/Ringo/InputParser.hs @@ -55,18 +55,22 @@ instance FromJSON FactColumn where case cType of "dimtime" -> DimTime <$> o .: "column" "nodimid" -> NoDimId <$> o .: "column" + "tenantid" -> TenantId <$> o .: "column" "dimid" -> DimId <$> o .: "table" <*> o .: "column" "dimval" -> DimVal <$> o .: "table" <*> o .: "column" "factcount" -> FactCount <$> o .:? "sourcecolumn" <*> o .: "column" "factsum" -> FactSum <$> o .: "sourcecolumn" <*> o .: "column" "factaverage" -> FactAverage <$> o .: "sourcecolumn" <*> o .: "column" "factcountdistinct" -> FactCountDistinct <$> o .:? "sourcecolumn" <*> o .: "column" + "factmax" -> FactMax <$> o .: "sourcecolumn" <*> o .: "column" + "factmin" -> FactMin <$> o .: "sourcecolumn" <*> o .: "column" _ -> fail $ "Invalid fact column type: " ++ cType parseJSON o = fail $ "Cannot parse fact column: " ++ show o instance FromJSON Fact where parseJSON (Object o) = Fact <$> o .: "name" <*> o .: "tablename" + <*> o .:? "persistent" .!= True <*> o .:? "parentfacts" .!= [] <*> o .: "columns" parseJSON o = fail $ "Cannot parse fact: " ++ show o diff --git a/src/Ringo.hs b/src/Ringo.hs index eb41d13..2035c53 100644 --- a/src/Ringo.hs +++ b/src/Ringo.hs @@ -33,8 +33,8 @@ import qualified Ringo.Validator as V -- >>> import Text.Show.Pretty -- >>> :{ --let sessionEventsTable = --- Table { tableName = "session_events" --- , tableColumns = +-- Table { tableName = "session_events" +-- , tableColumns = -- [ Column "id" "uuid" NotNull -- , Column "created_at" "timestamp without time zone" Null -- , Column "member_id" "integer" Null @@ -55,14 +55,14 @@ import qualified Ringo.Validator as V -- , Column "user_agent_version" "character varying(100)" Null -- , Column "user_agent_device" "character varying(15)" Null -- ] --- , tableConstraints = --- [ PrimaryKey "id" ] +-- , tableConstraints = [ PrimaryKey "id" ] -- } -- sessionFact = --- Fact { factName = "session" --- , factTableName = "session_events" --- , factParentNames = [] --- , factColumns = +-- Fact { factName = "session" +-- , factTableName = "session_events" +-- , factTablePersistent = True +-- , factParentNames = [] +-- , factColumns = -- [ DimTime "created_at" -- , NoDimId "publisher_id" -- , DimVal "user_agent" "browser_name" @@ -78,8 +78,8 @@ import qualified Ringo.Validator as V -- , FactCount Nothing "session_count" -- ] -- } --- tables = [sessionEventsTable] --- facts = [sessionFact] +-- tables = [sessionEventsTable] +-- facts = [sessionFact] -- typeDefaults = Map.fromList [ ("integer", "-1") -- , ("timestamp", "'00-00-00 00:00:00'") -- , ("character", "'__UNKNOWN_VAL__'") @@ -89,8 +89,8 @@ import qualified Ringo.Validator as V -- , ("numeric", "-1") -- , ("text", "'__UNKNOWN_VAL__'") -- ] --- settings = defSettings { settingTableNameSuffixTemplate = "" } --- env = Env tables facts settings typeDefaults +-- settings = defSettings { settingTableNameSuffixTemplate = "" } +-- env = Env tables facts settings typeDefaults -- :} -- | @@ -169,6 +169,16 @@ extractDependencies env = flip runReader env . E.extractDependencies -- most_specific_subdivision_name, -- time_zone); -- +-- create index on dim_geo (country_name) +-- ; +-- create index on dim_geo (city_name) +-- ; +-- create index on dim_geo (continent_name) +-- ; +-- create index on dim_geo (most_specific_subdivision_name) +-- ; +-- create index on dim_geo (time_zone) +-- ; -- -------- -- create table dim_user_agent ( -- id serial not null, @@ -188,9 +198,19 @@ extractDependencies env = flip runReader env . E.extractDependencies -- type, -- device); -- +-- create index on dim_user_agent (browser_name) +-- ; +-- create index on dim_user_agent (os) +-- ; +-- create index on dim_user_agent (name) +-- ; +-- create index on dim_user_agent (type) +-- ; +-- create index on dim_user_agent (device) +-- ; -- -------- dimensionTableDefnSQL :: Env -> Table -> [Text] -dimensionTableDefnSQL env = flip runReader env . G.tableDefnSQL +dimensionTableDefnSQL env = flip runReader env . G.dimensionTableDefnSQL -- | -- @@ -243,7 +263,7 @@ factTableDefnSQL env fact = flip runReader env . G.factTableDefnSQL fact -- where -- (geo_country_name is not null or geo_city_name is not null or geo_continent_name is not null or geo_most_specific_subdivision_name is not null or geo_time_zone is not null) -- and --- created_at <= ? +-- created_at < ? -- ; -- -- insert into dim_user_agent (browser_name, os, name, type, device) @@ -258,7 +278,7 @@ factTableDefnSQL env fact = flip runReader env . G.factTableDefnSQL fact -- where -- (browser_name is not null or os is not null or user_agent_name is not null or user_agent_type is not null or user_agent_device is not null) -- and --- created_at <= ? +-- created_at < ? -- ; -- -- >>> let sqls = map (dimensionTablePopulateSQL IncrementalPopulation env sessionFact) storySessionDimTableNames @@ -282,9 +302,9 @@ factTableDefnSQL env fact = flip runReader env . G.factTableDefnSQL fact -- where -- (geo_country_name is not null or geo_city_name is not null or geo_continent_name is not null or geo_most_specific_subdivision_name is not null or geo_time_zone is not null) -- and --- created_at <= ? +-- created_at < ? -- and --- created_at > ?) as x +-- created_at >= ?) as x -- left outer join -- dim_geo -- on dim_geo.country_name = x.geo_country_name @@ -321,9 +341,9 @@ factTableDefnSQL env fact = flip runReader env . G.factTableDefnSQL fact -- where -- (browser_name is not null or os is not null or user_agent_name is not null or user_agent_type is not null or user_agent_device is not null) -- and --- created_at <= ? +-- created_at < ? -- and --- created_at > ?) as x +-- created_at >= ?) as x -- left outer join -- dim_user_agent -- on dim_user_agent.browser_name = x.browser_name diff --git a/src/Ringo/Extractor.hs b/src/Ringo/Extractor.hs index 2abcfd4..6149fa7 100644 --- a/src/Ringo/Extractor.hs +++ b/src/Ringo/Extractor.hs @@ -26,21 +26,24 @@ extractFactTable fact = do tables <- asks envTables let table = fromJust . findTable (factTableName fact) $ tables - let countColType = settingFactCountColumnType - dimIdColName = settingDimTableIdColumnName - sourceColumnType colName = columnType . fromJust . findColumn colName . tableColumns $ table + let countColType = settingFactCountColumnType + dimIdColName = settingDimTableIdColumnName + sourceColumn cName = fromJust . findColumn cName . tableColumns $ table + notNullSourceColumnCopy cName = (sourceColumn cName) { columnNullable = NotNull } + notNullSourceColumnRename scName cName = (notNullSourceColumnCopy scName) { columnName = cName } columns = concatFor (factColumns fact) $ \col -> case col of DimTime cName -> [ Column (timeUnitColumnName dimIdColName cName settingTimeUnit) "bigint" NotNull ] - NoDimId cName -> let - col' = fromJust . findColumn cName . tableColumns $ table - in [ col' { columnNullable = NotNull } ] + NoDimId cName -> [ notNullSourceColumnCopy cName ] + TenantId cName -> [ notNullSourceColumnCopy cName ] FactCount _ cName -> [ Column cName countColType NotNull ] - FactSum scName cName -> [ Column cName (sourceColumnType scName) NotNull ] + FactSum scName cName -> [ notNullSourceColumnRename scName cName ] + FactMax scName cName -> [ notNullSourceColumnRename scName cName ] + FactMin scName cName -> [ notNullSourceColumnRename scName cName ] FactAverage scName cName -> [ Column (cName <> settingAvgCountColumSuffix) countColType NotNull - , Column (cName <> settingAvgSumColumnSuffix) (sourceColumnType scName) NotNull + , notNullSourceColumnRename scName (cName <> settingAvgSumColumnSuffix) ] FactCountDistinct _ cName -> [ Column cName "json" NotNull ] _ -> [] @@ -53,9 +56,10 @@ extractFactTable fact = do ukColNames = (++ map columnName fkColumns) . forMaybe (factColumns fact) $ \col -> case col of - DimTime cName -> Just (timeUnitColumnName dimIdColName cName settingTimeUnit) - NoDimId cName -> Just cName - _ -> Nothing + DimTime cName -> Just (timeUnitColumnName dimIdColName cName settingTimeUnit) + NoDimId cName -> Just cName + TenantId cName -> Just cName + _ -> Nothing return Table { tableName = diff --git a/src/Ringo/Generator.hs b/src/Ringo/Generator.hs index 09391a3..87bed07 100644 --- a/src/Ringo/Generator.hs +++ b/src/Ringo/Generator.hs @@ -1,5 +1,5 @@ module Ringo.Generator - ( tableDefnSQL + ( dimensionTableDefnSQL , factTableDefnSQL , dimensionTablePopulateSQL , factTablePopulateSQL diff --git a/src/Ringo/Generator/Create.hs b/src/Ringo/Generator/Create.hs index 304b02b..0c0f460 100644 --- a/src/Ringo/Generator/Create.hs +++ b/src/Ringo/Generator/Create.hs @@ -1,17 +1,17 @@ {-# LANGUAGE OverloadedStrings #-} {-# LANGUAGE RecordWildCards #-} {-# LANGUAGE CPP #-} -module Ringo.Generator.Create (tableDefnSQL, factTableDefnSQL) where +module Ringo.Generator.Create (dimensionTableDefnSQL, factTableDefnSQL) where #if MIN_VERSION_base(4,8,0) #else -import Control.Applicative ((<$>)) +import Control.Applicative ((<$>)) #endif import Control.Monad.Reader (Reader, asks) import Database.HsSqlPpp.Syntax ( Statement(..), RowConstraint(..), AlterTableAction(..) - , AlterTableOperation(..), Constraint(..), Cascade(..) - ) + , AlterTableOperation(..), Constraint(..), Cascade(..) ) +import Data.Maybe (listToMaybe, maybeToList) import Data.Monoid ((<>)) import Data.Text (Text) @@ -20,13 +20,10 @@ import Ringo.Generator.Sql import Ringo.Types import Ringo.Utils -tableDefnSQL :: Table -> Reader Env [Text] -tableDefnSQL table = map ppSQL <$> tableDefnSQL' table - -tableDefnSQL' :: Table -> Reader Env [Statement] -tableDefnSQL' Table {..} = do +tableDefnStmts :: Table -> Reader Env [Statement] +tableDefnStmts Table {..} = do Settings {..} <- asks envSettings - let tabName = tableName <> settingTableNameSuffixTemplate + let tabName = tableName <> settingTableNameSuffixTemplate tableSQL = CreateTable ea (name tabName) (map columnDefnSQL tableColumns) [] Nothing @@ -48,25 +45,47 @@ tableDefnSQL' Table {..} = do return $ tableSQL : map constraintDefnSQL tableConstraints -factTableDefnSQL :: Fact -> Table -> Reader Env [Text] -factTableDefnSQL fact table = do - ds <- map ppSQL <$> tableDefnSQL' table - is <- map (\st -> ppSQL st <> ";\n") <$> factTableIndexSQL' fact table +tableDefnSQL :: Table -> (Table -> Reader Env [Statement]) -> Reader Env [Text] +tableDefnSQL table indexFn = do + ds <- map ppSQL <$> tableDefnStmts table + is <- map (\st -> ppSQL st <> ";\n") <$> indexFn table return $ ds ++ is -factTableIndexSQL' :: Fact -> Table -> Reader Env [Statement] -factTableIndexSQL' fact table = do +dimensionTableDefnSQL :: Table -> Reader Env [Text] +dimensionTableDefnSQL table = tableDefnSQL table dimensionTableIndexStmts + +dimensionTableIndexStmts :: Table -> Reader Env [Statement] +dimensionTableIndexStmts Table {..} = do + Settings {..} <- asks envSettings + let tabName = tableName <> settingTableNameSuffixTemplate + tablePKColName = head [ cName | PrimaryKey cName <- tableConstraints ] + nonPKColNames = [ cName | Column cName _ _ <- tableColumns, cName /= tablePKColName ] + + return [ CreateIndexTSQL ea (nmc "") (name tabName) [nmc cName] + | cName <- nonPKColNames, length nonPKColNames > 1 ] + +factTableDefnSQL :: Fact -> Table -> Reader Env [Text] +factTableDefnSQL fact table = tableDefnSQL table (factTableIndexStmts fact) + +factTableIndexStmts :: Fact -> Table -> Reader Env [Statement] +factTableIndexStmts fact table = do Settings {..} <- asks envSettings allDims <- extractAllDimensionTables fact - let factCols = forMaybe (factColumns fact) $ \col -> case col of - DimTime cName -> Just $ timeUnitColumnName settingDimTableIdColumnName cName settingTimeUnit - NoDimId cName -> Just cName - _ -> Nothing + let dimTimeCol = head [ cName | DimTime cName <- factColumns fact ] + tenantIdCol = listToMaybe [ cName | TenantId cName <- factColumns fact ] + tabName = tableName table <> settingTableNameSuffixTemplate + dimTimeColName cName = timeUnitColumnName settingDimTableIdColumnName cName settingTimeUnit - dimCols = [ factDimFKIdColumnName settingDimPrefix settingDimTableIdColumnName tableName - | (_, Table {..}) <- allDims ] + factCols = forMaybe (factColumns fact) $ \col -> case col of + DimTime cName -> Just [dimTimeColName cName] + NoDimId cName -> Just [cName] + TenantId cName -> Just [cName] + _ -> Nothing - return [ CreateIndexTSQL ea (nmc "") (name $ tableName table <> settingTableNameSuffixTemplate) [nmc col] - | col <- factCols ++ dimCols ] + dimCols = [ [factDimFKIdColumnName settingDimPrefix settingDimTableIdColumnName tableName] + | (_, Table {..}) <- allDims ] + return [ CreateIndexTSQL ea (nmc "") (name $ tabName) (map nmc cols) + | cols <- factCols ++ dimCols ++ [ [cName, dimTimeColName dimTimeCol] + | cName <- maybeToList tenantIdCol ] ] diff --git a/src/Ringo/Generator/Populate/Dimension.hs b/src/Ringo/Generator/Populate/Dimension.hs index abed632..08d5714 100644 --- a/src/Ringo/Generator/Populate/Dimension.hs +++ b/src/Ringo/Generator/Populate/Dimension.hs @@ -35,8 +35,8 @@ dimensionTablePopulateSQL' popMode fact dimTableName = do timeCol = head [ cName | DimTime cName <- factColumns fact ] isNotNullC = parens . foldBinop "or" . map (postop "isnotnull" . ei . snd) $ colMapping selectWhereC = Just . foldBinop "and" $ - [ isNotNullC, binop "<=" (ei timeCol) placeholder ] ++ - [ binop ">" (ei timeCol) placeholder | popMode == IncrementalPopulation ] + [ isNotNullC, binop "<" (ei timeCol) placeholder ] ++ + [ binop ">=" (ei timeCol) placeholder | popMode == IncrementalPopulation ] selectC = makeSelect { selDistinct = Distinct , selSelectList = sl selectCols diff --git a/src/Ringo/Generator/Populate/Fact.hs b/src/Ringo/Generator/Populate/Fact.hs index 75d4c7a..5d8d3f6 100644 --- a/src/Ringo/Generator/Populate/Fact.hs +++ b/src/Ringo/Generator/Populate/Fact.hs @@ -134,16 +134,22 @@ factTablePopulateSQL popMode fact = do <> Text.pack (show $ timeUnitToSeconds settingTimeUnit) , True ) + dimIdColumnInsertSQL cName = + let sCol = fromJust . findColumn cName $ tableColumns fTable + in (cName, coalesceColumn defaults fTableName sCol, True) factColMap = concatFor (factColumns fact) $ \col -> case col of DimTime cName -> [ timeUnitColumnInsertSQL cName ] - NoDimId cName -> - let sCol = fromJust . findColumn cName $ tableColumns fTable - in [ (cName, coalesceColumn defaults fTableName sCol, True) ] + NoDimId cName -> [ dimIdColumnInsertSQL cName ] + TenantId cName -> [ dimIdColumnInsertSQL cName ] FactCount scName cName -> [ (cName, "count(" <> maybe "*" (fullColumnName fTableName) scName <> ")", False) ] FactSum scName cName -> [ (cName, "sum(" <> fullColumnName fTableName scName <> ")", False) ] + FactMax scName cName -> + [ (cName, "max(" <> fullColumnName fTableName scName <> ")", False) ] + FactMin scName cName -> + [ (cName, "min(" <> fullColumnName fTableName scName <> ")", False) ] FactAverage scName cName -> [ ( cName <> settingAvgCountColumSuffix , "count(" <> fullColumnName fTableName scName <> ")" @@ -194,7 +200,7 @@ factTablePopulateSQL popMode fact = do , ftpsSelectTable = fTableName , ftpsJoinClauses = joinClauses , ftpsWhereClauses = - timeCol <> " <= ?" : [ timeCol <> " > ?" | popMode == IncrementalPopulation ] + timeCol <> " < ?" : [ timeCol <> " >= ?" | popMode == IncrementalPopulation ] , ftpsGroupByCols = map ((groupByColPrefix <>) . fst3) . filter thd3 $ colMap } diff --git a/src/Ringo/Types.hs b/src/Ringo/Types.hs index 77244d4..af99067 100644 --- a/src/Ringo/Types.hs +++ b/src/Ringo/Types.hs @@ -69,31 +69,38 @@ timeUnitToSeconds Day = 24 * timeUnitToSeconds Hour timeUnitToSeconds Week = 7 * timeUnitToSeconds Day data Fact = Fact - { factName :: !TableName - , factTableName :: !TableName - , factParentNames :: ![TableName] - , factColumns :: ![FactColumn] + { factName :: !TableName + , factTableName :: !TableName + , factTablePersistent :: !Bool + , factParentNames :: ![TableName] + , factColumns :: ![FactColumn] } deriving (Eq, Show) data FactColumn = DimTime !ColumnName | NoDimId !ColumnName + | TenantId !ColumnName | DimId !TableName !ColumnName | DimVal !TableName !ColumnName | FactCount !(Maybe ColumnName) !ColumnName | FactSum !ColumnName !ColumnName | FactAverage !ColumnName !ColumnName | FactCountDistinct !(Maybe ColumnName) !ColumnName + | FactMax !ColumnName !ColumnName + | FactMin !ColumnName !ColumnName deriving (Eq, Show) factSourceColumnName :: FactColumn -> Maybe ColumnName factSourceColumnName (DimTime cName) = Just cName factSourceColumnName (NoDimId cName) = Just cName +factSourceColumnName (TenantId cName) = Just cName factSourceColumnName (DimId _ cName) = Just cName factSourceColumnName (DimVal _ cName) = Just cName factSourceColumnName (FactCount cName _) = cName factSourceColumnName (FactSum cName _) = Just cName factSourceColumnName (FactAverage cName _) = Just cName factSourceColumnName (FactCountDistinct cName _) = cName +factSourceColumnName (FactMax cName _) = Just cName +factSourceColumnName (FactMin cName _) = Just cName data Settings = Settings { settingDimPrefix :: !Text diff --git a/src/Ringo/Validator.hs b/src/Ringo/Validator.hs index e3c18e5..1e75bb4 100644 --- a/src/Ringo/Validator.hs +++ b/src/Ringo/Validator.hs @@ -59,7 +59,9 @@ validateFact Fact {..} = do , columnNullable (fromJust col) == Null ] typeDefaultVs = [ MissingTypeDefault cType - | cName <- [ c | DimVal _ c <- factColumns ] ++ [ c | NoDimId c <- factColumns ] + | cName <- [ c | DimVal _ c <- factColumns ] + ++ [ c | NoDimId c <- factColumns ] + ++ [ c | TenantId c <- factColumns ] , let col = findColumn cName (tableColumns table) , isJust col , let cType = columnType $ fromJust col