Merge branch 'master' into hssqlppp

pull/1/head
Abhinav Sarkar 7 years ago
commit 6d86392946
  1. 1
      .travis.yml
  2. 2
      app/Main.hs
  3. 4
      app/Ringo/InputParser.hs
  4. 58
      src/Ringo.hs
  5. 26
      src/Ringo/Extractor.hs
  6. 2
      src/Ringo/Generator.hs
  7. 67
      src/Ringo/Generator/Create.hs
  8. 4
      src/Ringo/Generator/Populate/Dimension.hs
  9. 14
      src/Ringo/Generator/Populate/Fact.hs
  10. 15
      src/Ringo/Types.hs
  11. 4
      src/Ringo/Validator.hs

@ -16,6 +16,7 @@ env:
- ARGS=""
- ARGS="--resolver lts-2"
- ARGS="--resolver lts-3"
- ARGS="--resolver lts-4"
- ARGS="--resolver lts"
- ARGS="--resolver nightly"

@ -54,7 +54,7 @@ writeFiles outputDir env@Env{..} = do
where
dimTables = [ (fact, extractDimensionTables env fact) | fact <- envFacts ]
factTables = [ (fact, extractFactTable env fact) | fact <- envFacts ]
factTables = [ (fact, extractFactTable env fact) | fact <- envFacts, factTablePersistent fact ]
dimTableDefnSQLs = [ (Create, tableName table, unlines . map sqlStr $ dimensionTableDefnSQL env table)
| (_, tabs) <- dimTables

@ -55,18 +55,22 @@ instance FromJSON FactColumn where
case cType of
"dimtime" -> DimTime <$> o .: "column"
"nodimid" -> NoDimId <$> o .: "column"
"tenantid" -> TenantId <$> o .: "column"
"dimid" -> DimId <$> o .: "table" <*> o .: "column"
"dimval" -> DimVal <$> o .: "table" <*> o .: "column"
"factcount" -> FactCount <$> o .:? "sourcecolumn" <*> o .: "column"
"factsum" -> FactSum <$> o .: "sourcecolumn" <*> o .: "column"
"factaverage" -> FactAverage <$> o .: "sourcecolumn" <*> o .: "column"
"factcountdistinct" -> FactCountDistinct <$> o .:? "sourcecolumn" <*> o .: "column"
"factmax" -> FactMax <$> o .: "sourcecolumn" <*> o .: "column"
"factmin" -> FactMin <$> o .: "sourcecolumn" <*> o .: "column"
_ -> fail $ "Invalid fact column type: " ++ cType
parseJSON o = fail $ "Cannot parse fact column: " ++ show o
instance FromJSON Fact where
parseJSON (Object o) = Fact <$> o .: "name"
<*> o .: "tablename"
<*> o .:? "persistent" .!= True
<*> o .:? "parentfacts" .!= []
<*> o .: "columns"
parseJSON o = fail $ "Cannot parse fact: " ++ show o

@ -33,8 +33,8 @@ import qualified Ringo.Validator as V
-- >>> import Text.Show.Pretty
-- >>> :{
--let sessionEventsTable =
-- Table { tableName = "session_events"
-- , tableColumns =
-- Table { tableName = "session_events"
-- , tableColumns =
-- [ Column "id" "uuid" NotNull
-- , Column "created_at" "timestamp without time zone" Null
-- , Column "member_id" "integer" Null
@ -55,14 +55,14 @@ import qualified Ringo.Validator as V
-- , Column "user_agent_version" "character varying(100)" Null
-- , Column "user_agent_device" "character varying(15)" Null
-- ]
-- , tableConstraints =
-- [ PrimaryKey "id" ]
-- , tableConstraints = [ PrimaryKey "id" ]
-- }
-- sessionFact =
-- Fact { factName = "session"
-- , factTableName = "session_events"
-- , factParentNames = []
-- , factColumns =
-- Fact { factName = "session"
-- , factTableName = "session_events"
-- , factTablePersistent = True
-- , factParentNames = []
-- , factColumns =
-- [ DimTime "created_at"
-- , NoDimId "publisher_id"
-- , DimVal "user_agent" "browser_name"
@ -78,8 +78,8 @@ import qualified Ringo.Validator as V
-- , FactCount Nothing "session_count"
-- ]
-- }
-- tables = [sessionEventsTable]
-- facts = [sessionFact]
-- tables = [sessionEventsTable]
-- facts = [sessionFact]
-- typeDefaults = Map.fromList [ ("integer", "-1")
-- , ("timestamp", "'00-00-00 00:00:00'")
-- , ("character", "'__UNKNOWN_VAL__'")
@ -89,8 +89,8 @@ import qualified Ringo.Validator as V
-- , ("numeric", "-1")
-- , ("text", "'__UNKNOWN_VAL__'")
-- ]
-- settings = defSettings { settingTableNameSuffixTemplate = "" }
-- env = Env tables facts settings typeDefaults
-- settings = defSettings { settingTableNameSuffixTemplate = "" }
-- env = Env tables facts settings typeDefaults
-- :}
-- |
@ -169,6 +169,16 @@ extractDependencies env = flip runReader env . E.extractDependencies
-- most_specific_subdivision_name,
-- time_zone);
-- <BLANKLINE>
-- create index on dim_geo (country_name)
-- ;
-- create index on dim_geo (city_name)
-- ;
-- create index on dim_geo (continent_name)
-- ;
-- create index on dim_geo (most_specific_subdivision_name)
-- ;
-- create index on dim_geo (time_zone)
-- ;
-- --------
-- create table dim_user_agent (
-- id serial not null,
@ -188,9 +198,19 @@ extractDependencies env = flip runReader env . E.extractDependencies
-- type,
-- device);
-- <BLANKLINE>
-- create index on dim_user_agent (browser_name)
-- ;
-- create index on dim_user_agent (os)
-- ;
-- create index on dim_user_agent (name)
-- ;
-- create index on dim_user_agent (type)
-- ;
-- create index on dim_user_agent (device)
-- ;
-- --------
dimensionTableDefnSQL :: Env -> Table -> [Text]
dimensionTableDefnSQL env = flip runReader env . G.tableDefnSQL
dimensionTableDefnSQL env = flip runReader env . G.dimensionTableDefnSQL
-- |
--
@ -243,7 +263,7 @@ factTableDefnSQL env fact = flip runReader env . G.factTableDefnSQL fact
-- where
-- (geo_country_name is not null or geo_city_name is not null or geo_continent_name is not null or geo_most_specific_subdivision_name is not null or geo_time_zone is not null)
-- and
-- created_at <= ?
-- created_at < ?
-- ;
-- <BLANKLINE>
-- insert into dim_user_agent (browser_name, os, name, type, device)
@ -258,7 +278,7 @@ factTableDefnSQL env fact = flip runReader env . G.factTableDefnSQL fact
-- where
-- (browser_name is not null or os is not null or user_agent_name is not null or user_agent_type is not null or user_agent_device is not null)
-- and
-- created_at <= ?
-- created_at < ?
-- ;
-- <BLANKLINE>
-- >>> let sqls = map (dimensionTablePopulateSQL IncrementalPopulation env sessionFact) storySessionDimTableNames
@ -282,9 +302,9 @@ factTableDefnSQL env fact = flip runReader env . G.factTableDefnSQL fact
-- where
-- (geo_country_name is not null or geo_city_name is not null or geo_continent_name is not null or geo_most_specific_subdivision_name is not null or geo_time_zone is not null)
-- and
-- created_at <= ?
-- created_at < ?
-- and
-- created_at > ?) as x
-- created_at >= ?) as x
-- left outer join
-- dim_geo
-- on dim_geo.country_name = x.geo_country_name
@ -321,9 +341,9 @@ factTableDefnSQL env fact = flip runReader env . G.factTableDefnSQL fact
-- where
-- (browser_name is not null or os is not null or user_agent_name is not null or user_agent_type is not null or user_agent_device is not null)
-- and
-- created_at <= ?
-- created_at < ?
-- and
-- created_at > ?) as x
-- created_at >= ?) as x
-- left outer join
-- dim_user_agent
-- on dim_user_agent.browser_name = x.browser_name

@ -26,21 +26,24 @@ extractFactTable fact = do
tables <- asks envTables
let table = fromJust . findTable (factTableName fact) $ tables
let countColType = settingFactCountColumnType
dimIdColName = settingDimTableIdColumnName
sourceColumnType colName = columnType . fromJust . findColumn colName . tableColumns $ table
let countColType = settingFactCountColumnType
dimIdColName = settingDimTableIdColumnName
sourceColumn cName = fromJust . findColumn cName . tableColumns $ table
notNullSourceColumnCopy cName = (sourceColumn cName) { columnNullable = NotNull }
notNullSourceColumnRename scName cName = (notNullSourceColumnCopy scName) { columnName = cName }
columns = concatFor (factColumns fact) $ \col -> case col of
DimTime cName ->
[ Column (timeUnitColumnName dimIdColName cName settingTimeUnit) "bigint" NotNull ]
NoDimId cName -> let
col' = fromJust . findColumn cName . tableColumns $ table
in [ col' { columnNullable = NotNull } ]
NoDimId cName -> [ notNullSourceColumnCopy cName ]
TenantId cName -> [ notNullSourceColumnCopy cName ]
FactCount _ cName -> [ Column cName countColType NotNull ]
FactSum scName cName -> [ Column cName (sourceColumnType scName) NotNull ]
FactSum scName cName -> [ notNullSourceColumnRename scName cName ]
FactMax scName cName -> [ notNullSourceColumnRename scName cName ]
FactMin scName cName -> [ notNullSourceColumnRename scName cName ]
FactAverage scName cName ->
[ Column (cName <> settingAvgCountColumSuffix) countColType NotNull
, Column (cName <> settingAvgSumColumnSuffix) (sourceColumnType scName) NotNull
, notNullSourceColumnRename scName (cName <> settingAvgSumColumnSuffix)
]
FactCountDistinct _ cName -> [ Column cName "json" NotNull ]
_ -> []
@ -53,9 +56,10 @@ extractFactTable fact = do
ukColNames =
(++ map columnName fkColumns)
. forMaybe (factColumns fact) $ \col -> case col of
DimTime cName -> Just (timeUnitColumnName dimIdColName cName settingTimeUnit)
NoDimId cName -> Just cName
_ -> Nothing
DimTime cName -> Just (timeUnitColumnName dimIdColName cName settingTimeUnit)
NoDimId cName -> Just cName
TenantId cName -> Just cName
_ -> Nothing
return Table
{ tableName =

@ -1,5 +1,5 @@
module Ringo.Generator
( tableDefnSQL
( dimensionTableDefnSQL
, factTableDefnSQL
, dimensionTablePopulateSQL
, factTablePopulateSQL

@ -1,17 +1,17 @@
{-# LANGUAGE OverloadedStrings #-}
{-# LANGUAGE RecordWildCards #-}
{-# LANGUAGE CPP #-}
module Ringo.Generator.Create (tableDefnSQL, factTableDefnSQL) where
module Ringo.Generator.Create (dimensionTableDefnSQL, factTableDefnSQL) where
#if MIN_VERSION_base(4,8,0)
#else
import Control.Applicative ((<$>))
import Control.Applicative ((<$>))
#endif
import Control.Monad.Reader (Reader, asks)
import Database.HsSqlPpp.Syntax ( Statement(..), RowConstraint(..), AlterTableAction(..)
, AlterTableOperation(..), Constraint(..), Cascade(..)
)
, AlterTableOperation(..), Constraint(..), Cascade(..) )
import Data.Maybe (listToMaybe, maybeToList)
import Data.Monoid ((<>))
import Data.Text (Text)
@ -20,13 +20,10 @@ import Ringo.Generator.Sql
import Ringo.Types
import Ringo.Utils
tableDefnSQL :: Table -> Reader Env [Text]
tableDefnSQL table = map ppSQL <$> tableDefnSQL' table
tableDefnSQL' :: Table -> Reader Env [Statement]
tableDefnSQL' Table {..} = do
tableDefnStmts :: Table -> Reader Env [Statement]
tableDefnStmts Table {..} = do
Settings {..} <- asks envSettings
let tabName = tableName <> settingTableNameSuffixTemplate
let tabName = tableName <> settingTableNameSuffixTemplate
tableSQL = CreateTable ea (name tabName) (map columnDefnSQL tableColumns) [] Nothing
@ -48,25 +45,47 @@ tableDefnSQL' Table {..} = do
return $ tableSQL : map constraintDefnSQL tableConstraints
factTableDefnSQL :: Fact -> Table -> Reader Env [Text]
factTableDefnSQL fact table = do
ds <- map ppSQL <$> tableDefnSQL' table
is <- map (\st -> ppSQL st <> ";\n") <$> factTableIndexSQL' fact table
tableDefnSQL :: Table -> (Table -> Reader Env [Statement]) -> Reader Env [Text]
tableDefnSQL table indexFn = do
ds <- map ppSQL <$> tableDefnStmts table
is <- map (\st -> ppSQL st <> ";\n") <$> indexFn table
return $ ds ++ is
factTableIndexSQL' :: Fact -> Table -> Reader Env [Statement]
factTableIndexSQL' fact table = do
dimensionTableDefnSQL :: Table -> Reader Env [Text]
dimensionTableDefnSQL table = tableDefnSQL table dimensionTableIndexStmts
dimensionTableIndexStmts :: Table -> Reader Env [Statement]
dimensionTableIndexStmts Table {..} = do
Settings {..} <- asks envSettings
let tabName = tableName <> settingTableNameSuffixTemplate
tablePKColName = head [ cName | PrimaryKey cName <- tableConstraints ]
nonPKColNames = [ cName | Column cName _ _ <- tableColumns, cName /= tablePKColName ]
return [ CreateIndexTSQL ea (nmc "") (name tabName) [nmc cName]
| cName <- nonPKColNames, length nonPKColNames > 1 ]
factTableDefnSQL :: Fact -> Table -> Reader Env [Text]
factTableDefnSQL fact table = tableDefnSQL table (factTableIndexStmts fact)
factTableIndexStmts :: Fact -> Table -> Reader Env [Statement]
factTableIndexStmts fact table = do
Settings {..} <- asks envSettings
allDims <- extractAllDimensionTables fact
let factCols = forMaybe (factColumns fact) $ \col -> case col of
DimTime cName -> Just $ timeUnitColumnName settingDimTableIdColumnName cName settingTimeUnit
NoDimId cName -> Just cName
_ -> Nothing
let dimTimeCol = head [ cName | DimTime cName <- factColumns fact ]
tenantIdCol = listToMaybe [ cName | TenantId cName <- factColumns fact ]
tabName = tableName table <> settingTableNameSuffixTemplate
dimTimeColName cName = timeUnitColumnName settingDimTableIdColumnName cName settingTimeUnit
dimCols = [ factDimFKIdColumnName settingDimPrefix settingDimTableIdColumnName tableName
| (_, Table {..}) <- allDims ]
factCols = forMaybe (factColumns fact) $ \col -> case col of
DimTime cName -> Just [dimTimeColName cName]
NoDimId cName -> Just [cName]
TenantId cName -> Just [cName]
_ -> Nothing
return [ CreateIndexTSQL ea (nmc "") (name $ tableName table <> settingTableNameSuffixTemplate) [nmc col]
| col <- factCols ++ dimCols ]
dimCols = [ [factDimFKIdColumnName settingDimPrefix settingDimTableIdColumnName tableName]
| (_, Table {..}) <- allDims ]
return [ CreateIndexTSQL ea (nmc "") (name $ tabName) (map nmc cols)
| cols <- factCols ++ dimCols ++ [ [cName, dimTimeColName dimTimeCol]
| cName <- maybeToList tenantIdCol ] ]

@ -35,8 +35,8 @@ dimensionTablePopulateSQL' popMode fact dimTableName = do
timeCol = head [ cName | DimTime cName <- factColumns fact ]
isNotNullC = parens . foldBinop "or" . map (postop "isnotnull" . ei . snd) $ colMapping
selectWhereC = Just . foldBinop "and" $
[ isNotNullC, binop "<=" (ei timeCol) placeholder ] ++
[ binop ">" (ei timeCol) placeholder | popMode == IncrementalPopulation ]
[ isNotNullC, binop "<" (ei timeCol) placeholder ] ++
[ binop ">=" (ei timeCol) placeholder | popMode == IncrementalPopulation ]
selectC = makeSelect
{ selDistinct = Distinct
, selSelectList = sl selectCols

@ -134,16 +134,22 @@ factTablePopulateSQL popMode fact = do
<> Text.pack (show $ timeUnitToSeconds settingTimeUnit)
, True
)
dimIdColumnInsertSQL cName =
let sCol = fromJust . findColumn cName $ tableColumns fTable
in (cName, coalesceColumn defaults fTableName sCol, True)
factColMap = concatFor (factColumns fact) $ \col -> case col of
DimTime cName -> [ timeUnitColumnInsertSQL cName ]
NoDimId cName ->
let sCol = fromJust . findColumn cName $ tableColumns fTable
in [ (cName, coalesceColumn defaults fTableName sCol, True) ]
NoDimId cName -> [ dimIdColumnInsertSQL cName ]
TenantId cName -> [ dimIdColumnInsertSQL cName ]
FactCount scName cName ->
[ (cName, "count(" <> maybe "*" (fullColumnName fTableName) scName <> ")", False) ]
FactSum scName cName ->
[ (cName, "sum(" <> fullColumnName fTableName scName <> ")", False) ]
FactMax scName cName ->
[ (cName, "max(" <> fullColumnName fTableName scName <> ")", False) ]
FactMin scName cName ->
[ (cName, "min(" <> fullColumnName fTableName scName <> ")", False) ]
FactAverage scName cName ->
[ ( cName <> settingAvgCountColumSuffix
, "count(" <> fullColumnName fTableName scName <> ")"
@ -194,7 +200,7 @@ factTablePopulateSQL popMode fact = do
, ftpsSelectTable = fTableName
, ftpsJoinClauses = joinClauses
, ftpsWhereClauses =
timeCol <> " <= ?" : [ timeCol <> " > ?" | popMode == IncrementalPopulation ]
timeCol <> " < ?" : [ timeCol <> " >= ?" | popMode == IncrementalPopulation ]
, ftpsGroupByCols = map ((groupByColPrefix <>) . fst3) . filter thd3 $ colMap
}

@ -69,31 +69,38 @@ timeUnitToSeconds Day = 24 * timeUnitToSeconds Hour
timeUnitToSeconds Week = 7 * timeUnitToSeconds Day
data Fact = Fact
{ factName :: !TableName
, factTableName :: !TableName
, factParentNames :: ![TableName]
, factColumns :: ![FactColumn]
{ factName :: !TableName
, factTableName :: !TableName
, factTablePersistent :: !Bool
, factParentNames :: ![TableName]
, factColumns :: ![FactColumn]
} deriving (Eq, Show)
data FactColumn = DimTime !ColumnName
| NoDimId !ColumnName
| TenantId !ColumnName
| DimId !TableName !ColumnName
| DimVal !TableName !ColumnName
| FactCount !(Maybe ColumnName) !ColumnName
| FactSum !ColumnName !ColumnName
| FactAverage !ColumnName !ColumnName
| FactCountDistinct !(Maybe ColumnName) !ColumnName
| FactMax !ColumnName !ColumnName
| FactMin !ColumnName !ColumnName
deriving (Eq, Show)
factSourceColumnName :: FactColumn -> Maybe ColumnName
factSourceColumnName (DimTime cName) = Just cName
factSourceColumnName (NoDimId cName) = Just cName
factSourceColumnName (TenantId cName) = Just cName
factSourceColumnName (DimId _ cName) = Just cName
factSourceColumnName (DimVal _ cName) = Just cName
factSourceColumnName (FactCount cName _) = cName
factSourceColumnName (FactSum cName _) = Just cName
factSourceColumnName (FactAverage cName _) = Just cName
factSourceColumnName (FactCountDistinct cName _) = cName
factSourceColumnName (FactMax cName _) = Just cName
factSourceColumnName (FactMin cName _) = Just cName
data Settings = Settings
{ settingDimPrefix :: !Text

@ -59,7 +59,9 @@ validateFact Fact {..} = do
, columnNullable (fromJust col) == Null ]
typeDefaultVs =
[ MissingTypeDefault cType
| cName <- [ c | DimVal _ c <- factColumns ] ++ [ c | NoDimId c <- factColumns ]
| cName <- [ c | DimVal _ c <- factColumns ]
++ [ c | NoDimId c <- factColumns ]
++ [ c | TenantId c <- factColumns ]
, let col = findColumn cName (tableColumns table)
, isJust col
, let cType = columnType $ fromJust col

Loading…
Cancel
Save