Adds docs for Ringo.Types module.
parent
11e7d7d3b2
commit
09f4a49779
|
@ -34,6 +34,9 @@ settingsParser = let Settings {..} = defSettings
|
||||||
<> value (Text.unpack settingFactPrefix)
|
<> value (Text.unpack settingFactPrefix)
|
||||||
<> showDefault
|
<> showDefault
|
||||||
<> help "Prefix for fact tables"))
|
<> help "Prefix for fact tables"))
|
||||||
|
<*> minorOption "fact-infix"
|
||||||
|
settingFactInfix
|
||||||
|
"Infix for fact tables"
|
||||||
<*> option auto (let timeunits = map show [Second ..]
|
<*> option auto (let timeunits = map show [Second ..]
|
||||||
in long "timeunit"
|
in long "timeunit"
|
||||||
<> short 't'
|
<> short 't'
|
||||||
|
@ -62,9 +65,6 @@ settingsParser = let Settings {..} = defSettings
|
||||||
<> value settingFactCountDistinctErrorRate
|
<> value settingFactCountDistinctErrorRate
|
||||||
<> showDefault
|
<> showDefault
|
||||||
<> help "Error rate for count distinct calulations")
|
<> help "Error rate for count distinct calulations")
|
||||||
<*> minorOption "fact-infix"
|
|
||||||
settingFactInfix
|
|
||||||
"Infix for fact tables"
|
|
||||||
<*> minorOption "dependencies-json-file"
|
<*> minorOption "dependencies-json-file"
|
||||||
settingDependenciesJSONFileName
|
settingDependenciesJSONFileName
|
||||||
"Name of the output dependencies json file"
|
"Name of the output dependencies json file"
|
||||||
|
|
|
@ -5,6 +5,7 @@ module Ringo
|
||||||
|
|
||||||
-- $setup
|
-- $setup
|
||||||
module Ringo.Types
|
module Ringo.Types
|
||||||
|
, makeEnv
|
||||||
, extractFactTable
|
, extractFactTable
|
||||||
, extractDimensionTables
|
, extractDimensionTables
|
||||||
, extractDependencies
|
, extractDependencies
|
||||||
|
@ -12,7 +13,6 @@ module Ringo
|
||||||
, factTableDefnSQL
|
, factTableDefnSQL
|
||||||
, dimensionTablePopulateSQL
|
, dimensionTablePopulateSQL
|
||||||
, factTablePopulateSQL
|
, factTablePopulateSQL
|
||||||
, makeEnv
|
|
||||||
) where
|
) where
|
||||||
|
|
||||||
import Control.Monad.Reader (runReader)
|
import Control.Monad.Reader (runReader)
|
||||||
|
|
|
@ -10,7 +10,7 @@ module Ringo.Generator.Create (dimensionTableDefnSQL, factTableDefnSQL) where
|
||||||
import Control.Applicative ((<$>))
|
import Control.Applicative ((<$>))
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
import Control.Monad.Reader (Reader, asks, withReader)
|
import Control.Monad.Reader (Reader, asks)
|
||||||
import Database.HsSqlPpp.Syntax ( Statement(..), RowConstraint(..), AlterTableAction(..)
|
import Database.HsSqlPpp.Syntax ( Statement(..), RowConstraint(..), AlterTableAction(..)
|
||||||
, AlterTableOperation(..), Constraint(..), Cascade(..)
|
, AlterTableOperation(..), Constraint(..), Cascade(..)
|
||||||
, Replace(..) )
|
, Replace(..) )
|
||||||
|
|
|
@ -7,11 +7,10 @@ import qualified Data.Map as Map
|
||||||
import qualified Data.Text as Text
|
import qualified Data.Text as Text
|
||||||
|
|
||||||
import Database.HsSqlPpp.Syntax (ScalarExpr)
|
import Database.HsSqlPpp.Syntax (ScalarExpr)
|
||||||
import Data.List (find, nub)
|
import Data.List (find)
|
||||||
import Data.Monoid ((<>))
|
import Data.Monoid ((<>))
|
||||||
import Data.Text (Text)
|
import Data.Text (Text)
|
||||||
|
|
||||||
import Ringo.Extractor.Internal
|
|
||||||
import Ringo.Generator.Sql
|
import Ringo.Generator.Sql
|
||||||
import Ringo.Types
|
import Ringo.Types
|
||||||
|
|
||||||
|
|
|
@ -3,30 +3,30 @@
|
||||||
{-# LANGUAGE Rank2Types #-}
|
{-# LANGUAGE Rank2Types #-}
|
||||||
|
|
||||||
module Ringo.Types
|
module Ringo.Types
|
||||||
( ColumnName
|
( Table(..)
|
||||||
, ColumnType
|
|
||||||
, TableName
|
, TableName
|
||||||
, Nullable(..)
|
|
||||||
, Column(..)
|
|
||||||
, TableConstraint(..)
|
, TableConstraint(..)
|
||||||
, Table(..)
|
, Column(..)
|
||||||
|
, ColumnName
|
||||||
|
, ColumnType
|
||||||
|
, Nullable(..)
|
||||||
|
, Fact(..)
|
||||||
|
, FactColumn(..)
|
||||||
|
, FactColumnType(..)
|
||||||
|
, FactColumnKind(..)
|
||||||
|
, factSourceColumnName
|
||||||
, TimeUnit(..)
|
, TimeUnit(..)
|
||||||
, timeUnitName
|
, timeUnitName
|
||||||
, timeUnitToSeconds
|
, timeUnitToSeconds
|
||||||
, Fact(..)
|
|
||||||
, FactColumnKind(..)
|
|
||||||
, FactColumnType(..)
|
|
||||||
, FactColumn(..)
|
|
||||||
, factSourceColumnName
|
|
||||||
, Settings(..)
|
|
||||||
, defSettings
|
|
||||||
, ValidationError(..)
|
|
||||||
, TypeDefaults
|
|
||||||
, Env
|
, Env
|
||||||
, envTables
|
, envTables
|
||||||
, envFacts
|
, envFacts
|
||||||
, envSettings
|
, envSettings
|
||||||
, envTypeDefaults
|
, envTypeDefaults
|
||||||
|
, Settings(..)
|
||||||
|
, defSettings
|
||||||
|
, TypeDefaults
|
||||||
|
, ValidationError(..)
|
||||||
, TablePopulationMode(..)
|
, TablePopulationMode(..)
|
||||||
, Dependencies
|
, Dependencies
|
||||||
) where
|
) where
|
||||||
|
|
|
@ -1,6 +1,7 @@
|
||||||
{-# LANGUAGE OverloadedStrings #-}
|
{-# LANGUAGE OverloadedStrings #-}
|
||||||
{-# LANGUAGE RecordWildCards #-}
|
{-# LANGUAGE RecordWildCards #-}
|
||||||
{-# LANGUAGE GADTs #-}
|
{-# LANGUAGE GADTs #-}
|
||||||
|
{-# LANGUAGE CPP #-}
|
||||||
{-# LANGUAGE Rank2Types #-}
|
{-# LANGUAGE Rank2Types #-}
|
||||||
{-# LANGUAGE StandaloneDeriving #-}
|
{-# LANGUAGE StandaloneDeriving #-}
|
||||||
{-# LANGUAGE DataKinds #-}
|
{-# LANGUAGE DataKinds #-}
|
||||||
|
@ -17,20 +18,30 @@ import Data.Text (Text)
|
||||||
showColNames :: [Text] -> String
|
showColNames :: [Text] -> String
|
||||||
showColNames cols = Text.unpack $ "(" <> Text.intercalate ", " cols <> ")"
|
showColNames cols = Text.unpack $ "(" <> Text.intercalate ", " cols <> ")"
|
||||||
|
|
||||||
|
-- | Name of a 'Column'
|
||||||
type ColumnName = Text
|
type ColumnName = Text
|
||||||
|
|
||||||
|
-- | Type of a 'Column'
|
||||||
type ColumnType = Text
|
type ColumnType = Text
|
||||||
|
|
||||||
|
-- | Name of a 'Table'
|
||||||
type TableName = Text
|
type TableName = Text
|
||||||
|
|
||||||
data Nullable = Null | NotNull deriving (Eq, Enum)
|
-- | Nullness of a 'Column'
|
||||||
|
data Nullable =
|
||||||
|
Null -- ^ the column is nullable
|
||||||
|
| NotNull -- ^ the column is not nullable
|
||||||
|
deriving (Eq, Enum)
|
||||||
|
|
||||||
instance Show Nullable where
|
instance Show Nullable where
|
||||||
show Null = "NULL"
|
show Null = "NULL"
|
||||||
show NotNull = "NOT NULL"
|
show NotNull = "NOT NULL"
|
||||||
|
|
||||||
|
-- | A column of a 'Table'
|
||||||
data Column = Column
|
data Column = Column
|
||||||
{ columnName :: !ColumnName
|
{ columnName :: !ColumnName -- ^ Name of the column
|
||||||
, columnType :: !ColumnType
|
, columnType :: !ColumnType -- ^ Type of the column
|
||||||
, columnNullable :: !Nullable
|
, columnNullable :: !Nullable -- ^ Nullness of the column
|
||||||
} deriving (Eq)
|
} deriving (Eq)
|
||||||
|
|
||||||
instance Show Column where
|
instance Show Column where
|
||||||
|
@ -39,70 +50,270 @@ instance Show Column where
|
||||||
++ Text.unpack columnType ++ " "
|
++ Text.unpack columnType ++ " "
|
||||||
++ show columnNullable
|
++ show columnNullable
|
||||||
|
|
||||||
data TableConstraint = PrimaryKey !ColumnName
|
-- | A constraint on a 'Table'
|
||||||
| UniqueKey ![ColumnName]
|
data TableConstraint =
|
||||||
| ForeignKey !TableName ![(ColumnName, ColumnName)]
|
-- | A primary key constraint
|
||||||
deriving (Eq)
|
PrimaryKey
|
||||||
|
{ tableConstrPrimaryKeyColumn :: !ColumnName -- ^ Name of the primary key column
|
||||||
|
}
|
||||||
|
-- | A unique key contraint
|
||||||
|
| UniqueKey
|
||||||
|
{ tableConstrUniqueKeyColumns :: ![ColumnName] -- ^ Name of the unique key columns
|
||||||
|
}
|
||||||
|
-- | A foreign key constraint
|
||||||
|
| ForeignKey
|
||||||
|
{ -- | Name of the table referenced by the foreign key
|
||||||
|
tableConstrForeignKeyTable :: !TableName
|
||||||
|
-- | Mapping of the columns as an associative list for the foreign key.
|
||||||
|
-- keys: this table's column names, values: referenced table's column names.
|
||||||
|
, tableConstrForeignKeyColumnMapping :: ![(ColumnName, ColumnName)]
|
||||||
|
}
|
||||||
|
deriving (Eq)
|
||||||
|
|
||||||
instance Show TableConstraint where
|
instance Show TableConstraint where
|
||||||
show (PrimaryKey col) = "PrimaryKey " ++ Text.unpack col
|
show (PrimaryKey col) = "PrimaryKey " ++ Text.unpack col
|
||||||
show (UniqueKey cols) = "UniqueKey " ++ showColNames cols
|
show (UniqueKey cols) = "UniqueKey " ++ showColNames cols
|
||||||
show (ForeignKey tName colMap) = "ForeignKey " ++ showColNames (map fst colMap) ++ " "
|
show (ForeignKey tName colMap) = "ForeignKey " ++ showColNames (map fst colMap) ++ " "
|
||||||
++ Text.unpack tName ++ " " ++ showColNames (map snd colMap)
|
++ Text.unpack tName ++ " " ++ showColNames (map snd colMap)
|
||||||
|
|
||||||
|
-- | A table representing a physical table in the database
|
||||||
|
--
|
||||||
|
-- The following example represents a set of tables from a multi-publisher blog system:
|
||||||
|
--
|
||||||
|
-- >>> :set -XOverloadedStrings
|
||||||
|
-- >>> :{
|
||||||
|
-- let publishersTable =
|
||||||
|
-- Table { tableName = "publishers"
|
||||||
|
-- , tableColumns =
|
||||||
|
-- [ Column "id" "integer" NotNull
|
||||||
|
-- , Column "name" "varchar(100)" NotNull
|
||||||
|
-- ]
|
||||||
|
-- , tableConstraints =
|
||||||
|
-- [ PrimaryKey "id"
|
||||||
|
-- , UniqueKey [ "name" ]
|
||||||
|
-- ]
|
||||||
|
-- }
|
||||||
|
-- usersTable =
|
||||||
|
-- Table { tableName = "users"
|
||||||
|
-- , tableColumns =
|
||||||
|
-- [ Column "id" "uuid" NotNull
|
||||||
|
-- , Column "created_at" "timestamp" NotNull
|
||||||
|
-- , Column "pub_id" "integer" NotNull
|
||||||
|
-- , Column "username" "varchar(100)" NotNull
|
||||||
|
-- , Column "email" "varchar(500)" Null
|
||||||
|
-- ]
|
||||||
|
-- , tableConstraints =
|
||||||
|
-- [ PrimaryKey "id"
|
||||||
|
-- , ForeignKey "publishers" [ ("pub_id", "id") ]
|
||||||
|
-- , UniqueKey [ "pub_id", "username" ]
|
||||||
|
-- ]
|
||||||
|
-- }
|
||||||
|
-- -- This table records the time spent by each user on a post
|
||||||
|
-- postViewEventsTable =
|
||||||
|
-- Table { tableName = "post_view_events"
|
||||||
|
-- , tableColumns =
|
||||||
|
-- [ Column "id" "uuid" NotNull
|
||||||
|
-- , Column "created_at" "timestamp" NotNull
|
||||||
|
-- , Column "user_id" "uuid" NotNull
|
||||||
|
-- , Column "pub_id" "integer" NotNull
|
||||||
|
-- , Column "post_id" "uuid" NotNull
|
||||||
|
-- , Column "geo_city" "varchar(100)" Null
|
||||||
|
-- , Column "geo_country" "varchar(100)" Null
|
||||||
|
-- , Column "device_version" "varchar(25)" Null
|
||||||
|
-- , Column "device_name" "varchar(100)" Null
|
||||||
|
-- , Column "device_type" "varchar(50)" Null
|
||||||
|
-- , Column "time_spent" "integer" NotNull
|
||||||
|
-- ]
|
||||||
|
-- , tableConstraints =
|
||||||
|
-- [ PrimaryKey "id"
|
||||||
|
-- , ForeignKey "users" [ ("user_id", "id") ]
|
||||||
|
-- , ForeignKey "publishers" [ ("pub_id", "id") ]
|
||||||
|
-- ]
|
||||||
|
-- }
|
||||||
|
-- :}
|
||||||
data Table = Table
|
data Table = Table
|
||||||
{ tableName :: !TableName
|
{ tableName :: !TableName -- ^ Name of the table
|
||||||
, tableColumns :: ![Column]
|
, tableColumns :: ![Column] -- ^ A list of the columns in the table
|
||||||
, tableConstraints :: ![TableConstraint]
|
, tableConstraints :: ![TableConstraint] -- ^ A list of the constraints on the table
|
||||||
} deriving (Eq)
|
} deriving (Eq)
|
||||||
|
|
||||||
instance Show Table where
|
instance Show Table where
|
||||||
show Table {..} =
|
show Table {..} =
|
||||||
unlines $ ("Table " ++ Text.unpack tableName) : map show tableColumns ++ map show tableConstraints
|
unlines $ ("Table " ++ Text.unpack tableName) : map show tableColumns ++ map show tableConstraints
|
||||||
|
|
||||||
data TimeUnit = Second | Minute | Hour | Day | Week
|
-- | Type of 'FactColumnType'.
|
||||||
deriving (Eq, Enum, Show, Read)
|
data FactColumnKind =
|
||||||
|
FCKNone -- ^ Type of a FactColumnType without any parameters
|
||||||
timeUnitName :: TimeUnit -> Text
|
| FCKTargetTable -- ^ Type of a FactColumnType with 'factColTargetTable' as the only parameter.
|
||||||
timeUnitName = Text.toLower . Text.pack . show
|
| FCKMaybeSourceColumn -- ^ Type of a FactColumnType with 'factColMaybeSourceColumn' as the only parameter.
|
||||||
|
| FCKSourceColumn -- ^ Type of a FactColumnType with 'factColSourceColumn' as the only parameter.
|
||||||
timeUnitToSeconds :: TimeUnit -> Int
|
|
||||||
timeUnitToSeconds Second = 1
|
|
||||||
timeUnitToSeconds Minute = 60 * timeUnitToSeconds Second
|
|
||||||
timeUnitToSeconds Hour = 60 * timeUnitToSeconds Minute
|
|
||||||
timeUnitToSeconds Day = 24 * timeUnitToSeconds Hour
|
|
||||||
timeUnitToSeconds Week = 7 * timeUnitToSeconds Day
|
|
||||||
|
|
||||||
data Fact = Fact
|
|
||||||
{ factName :: !TableName
|
|
||||||
, factTableName :: !TableName
|
|
||||||
, factTablePersistent :: !Bool
|
|
||||||
, factParentNames :: ![TableName]
|
|
||||||
, factColumns :: ![FactColumn]
|
|
||||||
} deriving (Show)
|
|
||||||
|
|
||||||
data FactColumnKind = FCKNone | FCKTargetTable | FCKMaybeSourceColumn | FCKSourceColumn
|
|
||||||
|
|
||||||
|
#if MIN_VERSION_base(4,9,0)
|
||||||
|
-- | Type of a fact column.
|
||||||
|
#else
|
||||||
|
-- | Type of a fact column.
|
||||||
|
--
|
||||||
|
-- 'DimTime':
|
||||||
|
-- A fact column which contains a time dimension data (e.g. `created_at`). This is not exatracted
|
||||||
|
-- as a dimension table and instead just converted to an int which depends on 'settingTimeUnit'.
|
||||||
|
-- Every fact must have one of this.
|
||||||
|
--
|
||||||
|
-- 'NoDimId':
|
||||||
|
-- A fact column which contains an id of a dimension which does not need to extracted as a table
|
||||||
|
-- and does not exist already.
|
||||||
|
--
|
||||||
|
-- 'TenantId':
|
||||||
|
-- A fact column which constains an id of a tenant in a multi-tenant database (e.g. `organization_id`).
|
||||||
|
-- This is not extracted as a dimension table.
|
||||||
|
--
|
||||||
|
-- 'DimId':
|
||||||
|
-- A fact column which constains an id of a dimension which does not need to extracted as a table
|
||||||
|
-- and exists already.
|
||||||
|
--
|
||||||
|
-- 'DimVal':
|
||||||
|
-- A fact column which constains a value which needs to be extracted to a dimension table.
|
||||||
|
-- Multiple DimVal fact columns can be extracted to the same dimension table.
|
||||||
|
--
|
||||||
|
-- 'FactCount':
|
||||||
|
-- A fact column which will contain the count of the rows (@count(*)@) or count of a source column
|
||||||
|
-- if provided.
|
||||||
|
--
|
||||||
|
-- 'FactCountDistinct':
|
||||||
|
-- A fact column which will contain the count of the unique values of a source column if provided
|
||||||
|
-- or the primary key of the table.
|
||||||
|
--
|
||||||
|
-- 'FactSum':
|
||||||
|
-- A fact column which will contain the sum of the values of the provided source column.
|
||||||
|
--
|
||||||
|
-- 'FactAverage':
|
||||||
|
-- A fact column which will contain the average of the values of the provided source column.
|
||||||
|
--
|
||||||
|
-- 'FactMax':
|
||||||
|
-- A fact column which will contain the maximum of the values of the provided source column.
|
||||||
|
--
|
||||||
|
-- 'FactMin':
|
||||||
|
-- A fact column which will contain the minimum of the values of the provided source column.
|
||||||
|
#endif
|
||||||
data FactColumnType (a :: FactColumnKind) where
|
data FactColumnType (a :: FactColumnKind) where
|
||||||
DimTime :: FactColumnType 'FCKNone
|
|
||||||
NoDimId :: FactColumnType 'FCKNone
|
#if MIN_VERSION_base(4,9,0)
|
||||||
TenantId :: FactColumnType 'FCKNone
|
-- | A fact column which contains a time dimension data (e.g. `created_at`). This is not exatracted
|
||||||
DimId :: { factColTargetTable :: !TableName } -> FactColumnType 'FCKTargetTable
|
-- as a dimension table and instead just converted to an int which depends on 'settingTimeUnit'.
|
||||||
DimVal :: { factColTargetTable :: !TableName } -> FactColumnType 'FCKTargetTable
|
-- Every fact must have one of this.
|
||||||
FactCount :: { factColMaybeSourceColumn :: !(Maybe ColumnName) } -> FactColumnType 'FCKMaybeSourceColumn
|
#endif
|
||||||
|
DimTime :: FactColumnType 'FCKNone
|
||||||
|
|
||||||
|
#if MIN_VERSION_base(4,9,0)
|
||||||
|
-- | A fact column which contains an id of a dimension which does not need to extracted as a table
|
||||||
|
-- and does not exist already.
|
||||||
|
#endif
|
||||||
|
NoDimId :: FactColumnType 'FCKNone
|
||||||
|
|
||||||
|
#if MIN_VERSION_base(4,9,0)
|
||||||
|
-- | A fact column which constains an id of a tenant in a multi-tenant database (e.g. `organization_id`).
|
||||||
|
-- This is not extracted as a dimension table.
|
||||||
|
#endif
|
||||||
|
TenantId :: FactColumnType 'FCKNone
|
||||||
|
|
||||||
|
#if MIN_VERSION_base(4,9,0)
|
||||||
|
-- | A fact column which constains an id of a dimension which does not need to extracted as a table
|
||||||
|
-- and exists already.
|
||||||
|
#endif
|
||||||
|
DimId :: { factColTargetTable :: !TableName -- ^ Name of the target dimension table
|
||||||
|
} -> FactColumnType 'FCKTargetTable
|
||||||
|
|
||||||
|
#if MIN_VERSION_base(4,9,0)
|
||||||
|
-- | A fact column which constains a value which needs to be extracted to a dimension table.
|
||||||
|
-- Multiple DimVal fact columns can be extracted to the same dimension table.
|
||||||
|
#endif
|
||||||
|
DimVal :: { factColTargetTable :: !TableName } -> FactColumnType 'FCKTargetTable
|
||||||
|
|
||||||
|
#if MIN_VERSION_base(4,9,0)
|
||||||
|
-- | A fact column which will contain the count of the rows (@count(*)@) or count of a source column
|
||||||
|
-- if provided.
|
||||||
|
#endif
|
||||||
|
FactCount :: { factColMaybeSourceColumn :: !(Maybe ColumnName) -- ^ Name of the optional source column
|
||||||
|
} -> FactColumnType 'FCKMaybeSourceColumn
|
||||||
|
|
||||||
|
#if MIN_VERSION_base(4,9,0)
|
||||||
|
-- | A fact column which will contain the count of the unique values of a source column if provided
|
||||||
|
-- or the primary key of the table.
|
||||||
|
#endif
|
||||||
FactCountDistinct :: { factColMaybeSourceColumn :: !(Maybe ColumnName) } -> FactColumnType 'FCKMaybeSourceColumn
|
FactCountDistinct :: { factColMaybeSourceColumn :: !(Maybe ColumnName) } -> FactColumnType 'FCKMaybeSourceColumn
|
||||||
FactSum :: { factColSourceColumn :: !ColumnName } -> FactColumnType 'FCKSourceColumn
|
|
||||||
FactAverage :: { factColSourceColumn :: !ColumnName } -> FactColumnType 'FCKSourceColumn
|
#if MIN_VERSION_base(4,9,0)
|
||||||
FactMax :: { factColSourceColumn :: !ColumnName } -> FactColumnType 'FCKSourceColumn
|
-- | A fact column which will contain the sum of the values of the provided source column.
|
||||||
FactMin :: { factColSourceColumn :: !ColumnName } -> FactColumnType 'FCKSourceColumn
|
#endif
|
||||||
|
FactSum :: { factColSourceColumn :: !ColumnName -- ^ Name of the source column
|
||||||
|
} -> FactColumnType 'FCKSourceColumn
|
||||||
|
|
||||||
|
#if MIN_VERSION_base(4,9,0)
|
||||||
|
-- | A fact column which will contain the average of the values of the provided source column.
|
||||||
|
#endif
|
||||||
|
FactAverage :: { factColSourceColumn :: !ColumnName } -> FactColumnType 'FCKSourceColumn
|
||||||
|
|
||||||
|
#if MIN_VERSION_base(4,9,0)
|
||||||
|
-- | A fact column which will contain the maximum of the values of the provided source column.
|
||||||
|
#endif
|
||||||
|
FactMax :: { factColSourceColumn :: !ColumnName } -> FactColumnType 'FCKSourceColumn
|
||||||
|
|
||||||
|
#if MIN_VERSION_base(4,9,0)
|
||||||
|
-- | A fact column which will contain the minimum of the values of the provided source column.
|
||||||
|
#endif
|
||||||
|
FactMin :: { factColSourceColumn :: !ColumnName } -> FactColumnType 'FCKSourceColumn
|
||||||
|
|
||||||
deriving instance Show (FactColumnType a)
|
deriving instance Show (FactColumnType a)
|
||||||
|
|
||||||
|
-- | A column in a fact table
|
||||||
data FactColumn = forall a. FactColumn
|
data FactColumn = forall a. FactColumn
|
||||||
{ factColTargetColumn :: !ColumnName
|
{ -- | Name of the fact column in the generated table
|
||||||
|
factColTargetColumn :: !ColumnName
|
||||||
|
-- | Type of the fact column
|
||||||
, factColType :: FactColumnType a }
|
, factColType :: FactColumnType a }
|
||||||
|
|
||||||
deriving instance Show FactColumn
|
deriving instance Show FactColumn
|
||||||
|
|
||||||
|
-- | A fact is a table that records measurements or metrics for a specific event
|
||||||
|
--
|
||||||
|
-- The following represent a set of facts for the same multi-publisher blog system:
|
||||||
|
--
|
||||||
|
-- >>> :{
|
||||||
|
-- let postFact =
|
||||||
|
-- Fact { factName = "post_views"
|
||||||
|
-- , factTableName = "post_view_events"
|
||||||
|
-- , factTablePersistent = True
|
||||||
|
-- , factParentNames = []
|
||||||
|
-- , factColumns =
|
||||||
|
-- [ FactColumn "created_at" $ DimTime
|
||||||
|
-- , FactColumn "publisher_id" $ TenantId
|
||||||
|
-- , FactColumn "user_id" $ DimId "users"
|
||||||
|
-- , FactColumn "post_id" $ NoDimId
|
||||||
|
-- , FactColumn "geo_city" $ DimVal "geo"
|
||||||
|
-- , FactColumn "geo_country" $ DimVal "geo"
|
||||||
|
-- , FactColumn "device_name" $ DimVal "device"
|
||||||
|
-- , FactColumn "device_type" $ DimVal "device"
|
||||||
|
-- , FactColumn "count" $ FactCount Nothing
|
||||||
|
-- , FactColumn "unq_device_count" $ FactCountDistinct $ Just "device_name"
|
||||||
|
-- , FactColumn "time_spent" $ FactSum "time_spent"
|
||||||
|
-- , FactColumn "max_time_spent" $ FactMax "time_spent"
|
||||||
|
-- ]
|
||||||
|
-- }
|
||||||
|
-- :}
|
||||||
|
data Fact = Fact
|
||||||
|
{ -- | Name of the fact
|
||||||
|
factName :: !TableName
|
||||||
|
-- | Name of the table from which the fact is derived
|
||||||
|
, factTableName :: !TableName
|
||||||
|
-- | If true, the generated fact table is actually created; if false, the generated
|
||||||
|
-- fact table is just used for intermidiate computations and is not actually created
|
||||||
|
, factTablePersistent :: !Bool
|
||||||
|
-- | Names of the parent facts
|
||||||
|
, factParentNames :: ![TableName]
|
||||||
|
-- | A list of fact columns in the fact
|
||||||
|
, factColumns :: ![FactColumn]
|
||||||
|
} deriving (Show)
|
||||||
|
|
||||||
|
-- | Returns the name of the source column of a fact column
|
||||||
factSourceColumnName :: FactColumn -> Maybe ColumnName
|
factSourceColumnName :: FactColumn -> Maybe ColumnName
|
||||||
factSourceColumnName FactColumn {..} = case factColType of
|
factSourceColumnName FactColumn {..} = case factColType of
|
||||||
DimTime -> Just factColTargetColumn
|
DimTime -> Just factColTargetColumn
|
||||||
|
@ -117,24 +328,58 @@ factSourceColumnName FactColumn {..} = case factColType of
|
||||||
FactMax {..} -> Just factColSourceColumn
|
FactMax {..} -> Just factColSourceColumn
|
||||||
FactMin {..} -> Just factColSourceColumn
|
FactMin {..} -> Just factColSourceColumn
|
||||||
|
|
||||||
|
-- | Units of time
|
||||||
|
data TimeUnit = Second | Minute | Hour | Day | Week
|
||||||
|
deriving (Eq, Enum, Show, Read)
|
||||||
|
|
||||||
|
-- | Returns the name of a time unit
|
||||||
|
timeUnitName :: TimeUnit -> Text
|
||||||
|
timeUnitName = Text.toLower . Text.pack . show
|
||||||
|
|
||||||
|
-- | Returns the number of seconds in a time unit
|
||||||
|
timeUnitToSeconds :: TimeUnit -> Int
|
||||||
|
timeUnitToSeconds Second = 1
|
||||||
|
timeUnitToSeconds Minute = 60 * timeUnitToSeconds Second
|
||||||
|
timeUnitToSeconds Hour = 60 * timeUnitToSeconds Minute
|
||||||
|
timeUnitToSeconds Day = 24 * timeUnitToSeconds Hour
|
||||||
|
timeUnitToSeconds Week = 7 * timeUnitToSeconds Day
|
||||||
|
|
||||||
|
-- | Global settings for the library
|
||||||
data Settings = Settings
|
data Settings = Settings
|
||||||
{ settingDimPrefix :: !Text
|
{ -- | Prefix for the names of the generated dimension tables
|
||||||
|
settingDimPrefix :: !Text
|
||||||
|
-- | Prefix for the names of the generated fact tables
|
||||||
, settingFactPrefix :: !Text
|
, settingFactPrefix :: !Text
|
||||||
, settingTimeUnit :: !TimeUnit
|
-- | Infix for the names of the generated fact tables
|
||||||
, settingAvgCountColumnSuffix :: !Text
|
|
||||||
, settingAvgSumColumnSuffix :: !Text
|
|
||||||
, settingDimTableIdColumnName :: !Text
|
|
||||||
, settingDimTableIdColumnType :: !Text
|
|
||||||
, settingFactCountColumnType :: !Text
|
|
||||||
, settingFactCountDistinctErrorRate :: !Double
|
|
||||||
, settingFactInfix :: !Text
|
, settingFactInfix :: !Text
|
||||||
|
-- | Time unit used to summarize the fact table data
|
||||||
|
, settingTimeUnit :: !TimeUnit
|
||||||
|
-- | Suffix for the names of the generated average-count fact columns
|
||||||
|
, settingAvgCountColumnSuffix :: !Text
|
||||||
|
-- | Suffix for the names of the generated average-sum fact columns
|
||||||
|
, settingAvgSumColumnSuffix :: !Text
|
||||||
|
-- | Name of the id columns of the generated dimension tables
|
||||||
|
, settingDimTableIdColumnName :: !Text
|
||||||
|
-- | Type of the id columns of the generated dimension tables
|
||||||
|
, settingDimTableIdColumnType :: !Text
|
||||||
|
-- | Type of the count fact columns of the generated dimension tables
|
||||||
|
, settingFactCountColumnType :: !Text
|
||||||
|
-- | Maximum error rate for the hyperloglog algorithm for computing
|
||||||
|
-- count distinct fact columns of the generated dimension tables
|
||||||
|
, settingFactCountDistinctErrorRate :: !Double
|
||||||
|
-- | Name of the generated JSON file containing the dependency graph
|
||||||
, settingDependenciesJSONFileName :: !Text
|
, settingDependenciesJSONFileName :: !Text
|
||||||
|
-- | Name of the generated JSON file containing the list of name of the generated fact tables
|
||||||
, settingFactsJSONFileName :: !Text
|
, settingFactsJSONFileName :: !Text
|
||||||
|
-- | Name of the generated JSON file containing the list of name of the generated dimension tables
|
||||||
, settingDimensionJSONFileName :: !Text
|
, settingDimensionJSONFileName :: !Text
|
||||||
|
-- | Value to coalesce the missing foreign key id column values to in the generated fact tables
|
||||||
, settingForeignKeyIdCoalesceValue :: !Int
|
, settingForeignKeyIdCoalesceValue :: !Int
|
||||||
|
-- | Suffix template for names of all the generated tables
|
||||||
, settingTableNameSuffixTemplate :: !Text
|
, settingTableNameSuffixTemplate :: !Text
|
||||||
} deriving (Eq, Show)
|
} deriving (Eq, Show)
|
||||||
|
|
||||||
|
-- | Settings with default values
|
||||||
defSettings :: Settings
|
defSettings :: Settings
|
||||||
defSettings = Settings
|
defSettings = Settings
|
||||||
{ settingDimPrefix = "dim_"
|
{ settingDimPrefix = "dim_"
|
||||||
|
@ -154,20 +399,35 @@ defSettings = Settings
|
||||||
, settingTableNameSuffixTemplate = "{{suff}}"
|
, settingTableNameSuffixTemplate = "{{suff}}"
|
||||||
}
|
}
|
||||||
|
|
||||||
data ValidationError = MissingTable !TableName
|
-- | Errors possible while validating the environment
|
||||||
| DuplicateTable !TableName
|
data ValidationError =
|
||||||
| MissingFact !TableName
|
-- | When referencing a table which is missing from the env
|
||||||
| DuplicateFact !TableName
|
MissingTable !TableName
|
||||||
| MissingColumn !TableName !ColumnName
|
-- | When referencing a fact which is missing from the env
|
||||||
| DuplicateColumn !TableName !ColumnName
|
| MissingFact !TableName
|
||||||
| MissingTimeColumn !TableName
|
-- | When referencing a column which is missing from the env
|
||||||
| DuplicateDimension !TableName
|
| MissingColumn !TableName !ColumnName
|
||||||
| MissingNotNullConstraint !TableName !ColumnName
|
-- | When referencing a fact which is missing from the env
|
||||||
| MissingTypeDefault !Text
|
| MissingTimeColumn !TableName
|
||||||
deriving (Eq, Show)
|
-- | When a 'DimTime' fact column of a fact is 'Null'
|
||||||
|
| MissingNotNullConstraint !TableName !ColumnName
|
||||||
|
-- | When the default value of a type is missing from the env
|
||||||
|
| MissingTypeDefault !Text
|
||||||
|
-- | When there are multiple tables with the same name in the env
|
||||||
|
| DuplicateTable !TableName
|
||||||
|
-- | When there are multiple facts with the same name in the env
|
||||||
|
| DuplicateFact !TableName
|
||||||
|
-- | When there are multiple columns with the same name in a table in the env
|
||||||
|
| DuplicateColumn !TableName !ColumnName
|
||||||
|
-- | When there are multiple dimensions with the same name in the env
|
||||||
|
| DuplicateDimension !TableName
|
||||||
|
deriving (Eq, Show)
|
||||||
|
|
||||||
|
-- | A mapping of SQL types to their default values used to coleasce null column values in
|
||||||
|
-- the generated dimension and fact tables
|
||||||
type TypeDefaults = Map Text Text
|
type TypeDefaults = Map Text Text
|
||||||
|
|
||||||
|
-- | The environment for the library to compute in
|
||||||
data Env = Env
|
data Env = Env
|
||||||
{ _envTables :: ![Table]
|
{ _envTables :: ![Table]
|
||||||
, _envFacts :: ![Fact]
|
, _envFacts :: ![Fact]
|
||||||
|
@ -175,18 +435,26 @@ data Env = Env
|
||||||
, _envTypeDefaults :: !TypeDefaults
|
, _envTypeDefaults :: !TypeDefaults
|
||||||
} deriving (Show)
|
} deriving (Show)
|
||||||
|
|
||||||
|
-- | Return the list of source tables from the env
|
||||||
envTables :: Env -> [Table]
|
envTables :: Env -> [Table]
|
||||||
envTables = _envTables
|
envTables = _envTables
|
||||||
|
|
||||||
|
-- | Return the list of facts to be generated from the env
|
||||||
envFacts :: Env -> [Fact]
|
envFacts :: Env -> [Fact]
|
||||||
envFacts = _envFacts
|
envFacts = _envFacts
|
||||||
|
|
||||||
|
-- | Return the settings from the env
|
||||||
envSettings :: Env -> Settings
|
envSettings :: Env -> Settings
|
||||||
envSettings = _envSettings
|
envSettings = _envSettings
|
||||||
|
|
||||||
|
-- | Return the defaults for the SQL types from the env
|
||||||
envTypeDefaults :: Env -> TypeDefaults
|
envTypeDefaults :: Env -> TypeDefaults
|
||||||
envTypeDefaults = _envTypeDefaults
|
envTypeDefaults = _envTypeDefaults
|
||||||
|
|
||||||
data TablePopulationMode = FullPopulation | IncrementalPopulation deriving (Eq, Show)
|
-- | The mode for population of the generated tables; used to switch the SQL for table population
|
||||||
|
data TablePopulationMode = FullPopulation -- ^ Populating the tables fully, starting with empty ones
|
||||||
|
| IncrementalPopulation -- ^ Populating the tables incrementally
|
||||||
|
deriving (Eq, Show)
|
||||||
|
|
||||||
|
-- | The dependency graph of the generated tables in the order to be populated
|
||||||
type Dependencies = Map TableName [TableName]
|
type Dependencies = Map TableName [TableName]
|
||||||
|
|
|
@ -63,7 +63,6 @@ validateFact Fact {..} = do
|
||||||
, let col = findColumn cName (tableColumns table)
|
, let col = findColumn cName (tableColumns table)
|
||||||
, isJust col
|
, isJust col
|
||||||
, columnNullable (fromJust col) == Null ]
|
, columnNullable (fromJust col) == Null ]
|
||||||
|
|
||||||
typeDefaultVs =
|
typeDefaultVs =
|
||||||
[ MissingTypeDefault cType
|
[ MissingTypeDefault cType
|
||||||
| cName <- [ c | FactColumn c DimVal {..} <- factColumns ]
|
| cName <- [ c | FactColumn c DimVal {..} <- factColumns ]
|
||||||
|
|
|
@ -1,2 +1,4 @@
|
||||||
import Test.DocTest
|
import Test.DocTest
|
||||||
main = doctest ["-isrc", "Ringo"]
|
main = do
|
||||||
|
doctest ["-isrc", "Ringo"]
|
||||||
|
doctest ["-isrc", "Ringo.Types.Internal"]
|
||||||
|
|
Loading…
Reference in New Issue