Adds docs for Ringo.Types module.

pull/1/head
Abhinav Sarkar 2016-07-05 21:00:20 +05:30
parent 11e7d7d3b2
commit 09f4a49779
No known key found for this signature in database
GPG Key ID: 7C9166A6F5465AD5
8 changed files with 355 additions and 87 deletions

View File

@ -34,6 +34,9 @@ settingsParser = let Settings {..} = defSettings
<> value (Text.unpack settingFactPrefix)
<> showDefault
<> help "Prefix for fact tables"))
<*> minorOption "fact-infix"
settingFactInfix
"Infix for fact tables"
<*> option auto (let timeunits = map show [Second ..]
in long "timeunit"
<> short 't'
@ -62,9 +65,6 @@ settingsParser = let Settings {..} = defSettings
<> value settingFactCountDistinctErrorRate
<> showDefault
<> help "Error rate for count distinct calulations")
<*> minorOption "fact-infix"
settingFactInfix
"Infix for fact tables"
<*> minorOption "dependencies-json-file"
settingDependenciesJSONFileName
"Name of the output dependencies json file"

View File

@ -5,6 +5,7 @@ module Ringo
-- $setup
module Ringo.Types
, makeEnv
, extractFactTable
, extractDimensionTables
, extractDependencies
@ -12,7 +13,6 @@ module Ringo
, factTableDefnSQL
, dimensionTablePopulateSQL
, factTablePopulateSQL
, makeEnv
) where
import Control.Monad.Reader (runReader)

View File

@ -10,7 +10,7 @@ module Ringo.Generator.Create (dimensionTableDefnSQL, factTableDefnSQL) where
import Control.Applicative ((<$>))
#endif
import Control.Monad.Reader (Reader, asks, withReader)
import Control.Monad.Reader (Reader, asks)
import Database.HsSqlPpp.Syntax ( Statement(..), RowConstraint(..), AlterTableAction(..)
, AlterTableOperation(..), Constraint(..), Cascade(..)
, Replace(..) )

View File

@ -7,11 +7,10 @@ import qualified Data.Map as Map
import qualified Data.Text as Text
import Database.HsSqlPpp.Syntax (ScalarExpr)
import Data.List (find, nub)
import Data.List (find)
import Data.Monoid ((<>))
import Data.Text (Text)
import Ringo.Extractor.Internal
import Ringo.Generator.Sql
import Ringo.Types

View File

@ -3,30 +3,30 @@
{-# LANGUAGE Rank2Types #-}
module Ringo.Types
( ColumnName
, ColumnType
( Table(..)
, TableName
, Nullable(..)
, Column(..)
, TableConstraint(..)
, Table(..)
, Column(..)
, ColumnName
, ColumnType
, Nullable(..)
, Fact(..)
, FactColumn(..)
, FactColumnType(..)
, FactColumnKind(..)
, factSourceColumnName
, TimeUnit(..)
, timeUnitName
, timeUnitToSeconds
, Fact(..)
, FactColumnKind(..)
, FactColumnType(..)
, FactColumn(..)
, factSourceColumnName
, Settings(..)
, defSettings
, ValidationError(..)
, TypeDefaults
, Env
, envTables
, envFacts
, envSettings
, envTypeDefaults
, Settings(..)
, defSettings
, TypeDefaults
, ValidationError(..)
, TablePopulationMode(..)
, Dependencies
) where

View File

@ -1,6 +1,7 @@
{-# LANGUAGE OverloadedStrings #-}
{-# LANGUAGE RecordWildCards #-}
{-# LANGUAGE GADTs #-}
{-# LANGUAGE CPP #-}
{-# LANGUAGE Rank2Types #-}
{-# LANGUAGE StandaloneDeriving #-}
{-# LANGUAGE DataKinds #-}
@ -17,20 +18,30 @@ import Data.Text (Text)
showColNames :: [Text] -> String
showColNames cols = Text.unpack $ "(" <> Text.intercalate ", " cols <> ")"
-- | Name of a 'Column'
type ColumnName = Text
-- | Type of a 'Column'
type ColumnType = Text
-- | Name of a 'Table'
type TableName = Text
data Nullable = Null | NotNull deriving (Eq, Enum)
-- | Nullness of a 'Column'
data Nullable =
Null -- ^ the column is nullable
| NotNull -- ^ the column is not nullable
deriving (Eq, Enum)
instance Show Nullable where
show Null = "NULL"
show NotNull = "NOT NULL"
-- | A column of a 'Table'
data Column = Column
{ columnName :: !ColumnName
, columnType :: !ColumnType
, columnNullable :: !Nullable
{ columnName :: !ColumnName -- ^ Name of the column
, columnType :: !ColumnType -- ^ Type of the column
, columnNullable :: !Nullable -- ^ Nullness of the column
} deriving (Eq)
instance Show Column where
@ -39,9 +50,24 @@ instance Show Column where
++ Text.unpack columnType ++ " "
++ show columnNullable
data TableConstraint = PrimaryKey !ColumnName
| UniqueKey ![ColumnName]
| ForeignKey !TableName ![(ColumnName, ColumnName)]
-- | A constraint on a 'Table'
data TableConstraint =
-- | A primary key constraint
PrimaryKey
{ tableConstrPrimaryKeyColumn :: !ColumnName -- ^ Name of the primary key column
}
-- | A unique key contraint
| UniqueKey
{ tableConstrUniqueKeyColumns :: ![ColumnName] -- ^ Name of the unique key columns
}
-- | A foreign key constraint
| ForeignKey
{ -- | Name of the table referenced by the foreign key
tableConstrForeignKeyTable :: !TableName
-- | Mapping of the columns as an associative list for the foreign key.
-- keys: this table's column names, values: referenced table's column names.
, tableConstrForeignKeyColumnMapping :: ![(ColumnName, ColumnName)]
}
deriving (Eq)
instance Show TableConstraint where
@ -49,60 +75,245 @@ instance Show TableConstraint where
show (UniqueKey cols) = "UniqueKey " ++ showColNames cols
show (ForeignKey tName colMap) = "ForeignKey " ++ showColNames (map fst colMap) ++ " "
++ Text.unpack tName ++ " " ++ showColNames (map snd colMap)
-- | A table representing a physical table in the database
--
-- The following example represents a set of tables from a multi-publisher blog system:
--
-- >>> :set -XOverloadedStrings
-- >>> :{
-- let publishersTable =
-- Table { tableName = "publishers"
-- , tableColumns =
-- [ Column "id" "integer" NotNull
-- , Column "name" "varchar(100)" NotNull
-- ]
-- , tableConstraints =
-- [ PrimaryKey "id"
-- , UniqueKey [ "name" ]
-- ]
-- }
-- usersTable =
-- Table { tableName = "users"
-- , tableColumns =
-- [ Column "id" "uuid" NotNull
-- , Column "created_at" "timestamp" NotNull
-- , Column "pub_id" "integer" NotNull
-- , Column "username" "varchar(100)" NotNull
-- , Column "email" "varchar(500)" Null
-- ]
-- , tableConstraints =
-- [ PrimaryKey "id"
-- , ForeignKey "publishers" [ ("pub_id", "id") ]
-- , UniqueKey [ "pub_id", "username" ]
-- ]
-- }
-- -- This table records the time spent by each user on a post
-- postViewEventsTable =
-- Table { tableName = "post_view_events"
-- , tableColumns =
-- [ Column "id" "uuid" NotNull
-- , Column "created_at" "timestamp" NotNull
-- , Column "user_id" "uuid" NotNull
-- , Column "pub_id" "integer" NotNull
-- , Column "post_id" "uuid" NotNull
-- , Column "geo_city" "varchar(100)" Null
-- , Column "geo_country" "varchar(100)" Null
-- , Column "device_version" "varchar(25)" Null
-- , Column "device_name" "varchar(100)" Null
-- , Column "device_type" "varchar(50)" Null
-- , Column "time_spent" "integer" NotNull
-- ]
-- , tableConstraints =
-- [ PrimaryKey "id"
-- , ForeignKey "users" [ ("user_id", "id") ]
-- , ForeignKey "publishers" [ ("pub_id", "id") ]
-- ]
-- }
-- :}
data Table = Table
{ tableName :: !TableName
, tableColumns :: ![Column]
, tableConstraints :: ![TableConstraint]
{ tableName :: !TableName -- ^ Name of the table
, tableColumns :: ![Column] -- ^ A list of the columns in the table
, tableConstraints :: ![TableConstraint] -- ^ A list of the constraints on the table
} deriving (Eq)
instance Show Table where
show Table {..} =
unlines $ ("Table " ++ Text.unpack tableName) : map show tableColumns ++ map show tableConstraints
data TimeUnit = Second | Minute | Hour | Day | Week
deriving (Eq, Enum, Show, Read)
timeUnitName :: TimeUnit -> Text
timeUnitName = Text.toLower . Text.pack . show
timeUnitToSeconds :: TimeUnit -> Int
timeUnitToSeconds Second = 1
timeUnitToSeconds Minute = 60 * timeUnitToSeconds Second
timeUnitToSeconds Hour = 60 * timeUnitToSeconds Minute
timeUnitToSeconds Day = 24 * timeUnitToSeconds Hour
timeUnitToSeconds Week = 7 * timeUnitToSeconds Day
data Fact = Fact
{ factName :: !TableName
, factTableName :: !TableName
, factTablePersistent :: !Bool
, factParentNames :: ![TableName]
, factColumns :: ![FactColumn]
} deriving (Show)
data FactColumnKind = FCKNone | FCKTargetTable | FCKMaybeSourceColumn | FCKSourceColumn
-- | Type of 'FactColumnType'.
data FactColumnKind =
FCKNone -- ^ Type of a FactColumnType without any parameters
| FCKTargetTable -- ^ Type of a FactColumnType with 'factColTargetTable' as the only parameter.
| FCKMaybeSourceColumn -- ^ Type of a FactColumnType with 'factColMaybeSourceColumn' as the only parameter.
| FCKSourceColumn -- ^ Type of a FactColumnType with 'factColSourceColumn' as the only parameter.
#if MIN_VERSION_base(4,9,0)
-- | Type of a fact column.
#else
-- | Type of a fact column.
--
-- 'DimTime':
-- A fact column which contains a time dimension data (e.g. `created_at`). This is not exatracted
-- as a dimension table and instead just converted to an int which depends on 'settingTimeUnit'.
-- Every fact must have one of this.
--
-- 'NoDimId':
-- A fact column which contains an id of a dimension which does not need to extracted as a table
-- and does not exist already.
--
-- 'TenantId':
-- A fact column which constains an id of a tenant in a multi-tenant database (e.g. `organization_id`).
-- This is not extracted as a dimension table.
--
-- 'DimId':
-- A fact column which constains an id of a dimension which does not need to extracted as a table
-- and exists already.
--
-- 'DimVal':
-- A fact column which constains a value which needs to be extracted to a dimension table.
-- Multiple DimVal fact columns can be extracted to the same dimension table.
--
-- 'FactCount':
-- A fact column which will contain the count of the rows (@count(*)@) or count of a source column
-- if provided.
--
-- 'FactCountDistinct':
-- A fact column which will contain the count of the unique values of a source column if provided
-- or the primary key of the table.
--
-- 'FactSum':
-- A fact column which will contain the sum of the values of the provided source column.
--
-- 'FactAverage':
-- A fact column which will contain the average of the values of the provided source column.
--
-- 'FactMax':
-- A fact column which will contain the maximum of the values of the provided source column.
--
-- 'FactMin':
-- A fact column which will contain the minimum of the values of the provided source column.
#endif
data FactColumnType (a :: FactColumnKind) where
#if MIN_VERSION_base(4,9,0)
-- | A fact column which contains a time dimension data (e.g. `created_at`). This is not exatracted
-- as a dimension table and instead just converted to an int which depends on 'settingTimeUnit'.
-- Every fact must have one of this.
#endif
DimTime :: FactColumnType 'FCKNone
#if MIN_VERSION_base(4,9,0)
-- | A fact column which contains an id of a dimension which does not need to extracted as a table
-- and does not exist already.
#endif
NoDimId :: FactColumnType 'FCKNone
#if MIN_VERSION_base(4,9,0)
-- | A fact column which constains an id of a tenant in a multi-tenant database (e.g. `organization_id`).
-- This is not extracted as a dimension table.
#endif
TenantId :: FactColumnType 'FCKNone
DimId :: { factColTargetTable :: !TableName } -> FactColumnType 'FCKTargetTable
#if MIN_VERSION_base(4,9,0)
-- | A fact column which constains an id of a dimension which does not need to extracted as a table
-- and exists already.
#endif
DimId :: { factColTargetTable :: !TableName -- ^ Name of the target dimension table
} -> FactColumnType 'FCKTargetTable
#if MIN_VERSION_base(4,9,0)
-- | A fact column which constains a value which needs to be extracted to a dimension table.
-- Multiple DimVal fact columns can be extracted to the same dimension table.
#endif
DimVal :: { factColTargetTable :: !TableName } -> FactColumnType 'FCKTargetTable
FactCount :: { factColMaybeSourceColumn :: !(Maybe ColumnName) } -> FactColumnType 'FCKMaybeSourceColumn
#if MIN_VERSION_base(4,9,0)
-- | A fact column which will contain the count of the rows (@count(*)@) or count of a source column
-- if provided.
#endif
FactCount :: { factColMaybeSourceColumn :: !(Maybe ColumnName) -- ^ Name of the optional source column
} -> FactColumnType 'FCKMaybeSourceColumn
#if MIN_VERSION_base(4,9,0)
-- | A fact column which will contain the count of the unique values of a source column if provided
-- or the primary key of the table.
#endif
FactCountDistinct :: { factColMaybeSourceColumn :: !(Maybe ColumnName) } -> FactColumnType 'FCKMaybeSourceColumn
FactSum :: { factColSourceColumn :: !ColumnName } -> FactColumnType 'FCKSourceColumn
#if MIN_VERSION_base(4,9,0)
-- | A fact column which will contain the sum of the values of the provided source column.
#endif
FactSum :: { factColSourceColumn :: !ColumnName -- ^ Name of the source column
} -> FactColumnType 'FCKSourceColumn
#if MIN_VERSION_base(4,9,0)
-- | A fact column which will contain the average of the values of the provided source column.
#endif
FactAverage :: { factColSourceColumn :: !ColumnName } -> FactColumnType 'FCKSourceColumn
#if MIN_VERSION_base(4,9,0)
-- | A fact column which will contain the maximum of the values of the provided source column.
#endif
FactMax :: { factColSourceColumn :: !ColumnName } -> FactColumnType 'FCKSourceColumn
#if MIN_VERSION_base(4,9,0)
-- | A fact column which will contain the minimum of the values of the provided source column.
#endif
FactMin :: { factColSourceColumn :: !ColumnName } -> FactColumnType 'FCKSourceColumn
deriving instance Show (FactColumnType a)
-- | A column in a fact table
data FactColumn = forall a. FactColumn
{ factColTargetColumn :: !ColumnName
{ -- | Name of the fact column in the generated table
factColTargetColumn :: !ColumnName
-- | Type of the fact column
, factColType :: FactColumnType a }
deriving instance Show FactColumn
-- | A fact is a table that records measurements or metrics for a specific event
--
-- The following represent a set of facts for the same multi-publisher blog system:
--
-- >>> :{
-- let postFact =
-- Fact { factName = "post_views"
-- , factTableName = "post_view_events"
-- , factTablePersistent = True
-- , factParentNames = []
-- , factColumns =
-- [ FactColumn "created_at" $ DimTime
-- , FactColumn "publisher_id" $ TenantId
-- , FactColumn "user_id" $ DimId "users"
-- , FactColumn "post_id" $ NoDimId
-- , FactColumn "geo_city" $ DimVal "geo"
-- , FactColumn "geo_country" $ DimVal "geo"
-- , FactColumn "device_name" $ DimVal "device"
-- , FactColumn "device_type" $ DimVal "device"
-- , FactColumn "count" $ FactCount Nothing
-- , FactColumn "unq_device_count" $ FactCountDistinct $ Just "device_name"
-- , FactColumn "time_spent" $ FactSum "time_spent"
-- , FactColumn "max_time_spent" $ FactMax "time_spent"
-- ]
-- }
-- :}
data Fact = Fact
{ -- | Name of the fact
factName :: !TableName
-- | Name of the table from which the fact is derived
, factTableName :: !TableName
-- | If true, the generated fact table is actually created; if false, the generated
-- fact table is just used for intermidiate computations and is not actually created
, factTablePersistent :: !Bool
-- | Names of the parent facts
, factParentNames :: ![TableName]
-- | A list of fact columns in the fact
, factColumns :: ![FactColumn]
} deriving (Show)
-- | Returns the name of the source column of a fact column
factSourceColumnName :: FactColumn -> Maybe ColumnName
factSourceColumnName FactColumn {..} = case factColType of
DimTime -> Just factColTargetColumn
@ -117,24 +328,58 @@ factSourceColumnName FactColumn {..} = case factColType of
FactMax {..} -> Just factColSourceColumn
FactMin {..} -> Just factColSourceColumn
-- | Units of time
data TimeUnit = Second | Minute | Hour | Day | Week
deriving (Eq, Enum, Show, Read)
-- | Returns the name of a time unit
timeUnitName :: TimeUnit -> Text
timeUnitName = Text.toLower . Text.pack . show
-- | Returns the number of seconds in a time unit
timeUnitToSeconds :: TimeUnit -> Int
timeUnitToSeconds Second = 1
timeUnitToSeconds Minute = 60 * timeUnitToSeconds Second
timeUnitToSeconds Hour = 60 * timeUnitToSeconds Minute
timeUnitToSeconds Day = 24 * timeUnitToSeconds Hour
timeUnitToSeconds Week = 7 * timeUnitToSeconds Day
-- | Global settings for the library
data Settings = Settings
{ settingDimPrefix :: !Text
{ -- | Prefix for the names of the generated dimension tables
settingDimPrefix :: !Text
-- | Prefix for the names of the generated fact tables
, settingFactPrefix :: !Text
, settingTimeUnit :: !TimeUnit
, settingAvgCountColumnSuffix :: !Text
, settingAvgSumColumnSuffix :: !Text
, settingDimTableIdColumnName :: !Text
, settingDimTableIdColumnType :: !Text
, settingFactCountColumnType :: !Text
, settingFactCountDistinctErrorRate :: !Double
-- | Infix for the names of the generated fact tables
, settingFactInfix :: !Text
-- | Time unit used to summarize the fact table data
, settingTimeUnit :: !TimeUnit
-- | Suffix for the names of the generated average-count fact columns
, settingAvgCountColumnSuffix :: !Text
-- | Suffix for the names of the generated average-sum fact columns
, settingAvgSumColumnSuffix :: !Text
-- | Name of the id columns of the generated dimension tables
, settingDimTableIdColumnName :: !Text
-- | Type of the id columns of the generated dimension tables
, settingDimTableIdColumnType :: !Text
-- | Type of the count fact columns of the generated dimension tables
, settingFactCountColumnType :: !Text
-- | Maximum error rate for the hyperloglog algorithm for computing
-- count distinct fact columns of the generated dimension tables
, settingFactCountDistinctErrorRate :: !Double
-- | Name of the generated JSON file containing the dependency graph
, settingDependenciesJSONFileName :: !Text
-- | Name of the generated JSON file containing the list of name of the generated fact tables
, settingFactsJSONFileName :: !Text
-- | Name of the generated JSON file containing the list of name of the generated dimension tables
, settingDimensionJSONFileName :: !Text
-- | Value to coalesce the missing foreign key id column values to in the generated fact tables
, settingForeignKeyIdCoalesceValue :: !Int
-- | Suffix template for names of all the generated tables
, settingTableNameSuffixTemplate :: !Text
} deriving (Eq, Show)
-- | Settings with default values
defSettings :: Settings
defSettings = Settings
{ settingDimPrefix = "dim_"
@ -154,20 +399,35 @@ defSettings = Settings
, settingTableNameSuffixTemplate = "{{suff}}"
}
data ValidationError = MissingTable !TableName
| DuplicateTable !TableName
-- | Errors possible while validating the environment
data ValidationError =
-- | When referencing a table which is missing from the env
MissingTable !TableName
-- | When referencing a fact which is missing from the env
| MissingFact !TableName
| DuplicateFact !TableName
-- | When referencing a column which is missing from the env
| MissingColumn !TableName !ColumnName
| DuplicateColumn !TableName !ColumnName
-- | When referencing a fact which is missing from the env
| MissingTimeColumn !TableName
| DuplicateDimension !TableName
-- | When a 'DimTime' fact column of a fact is 'Null'
| MissingNotNullConstraint !TableName !ColumnName
-- | When the default value of a type is missing from the env
| MissingTypeDefault !Text
-- | When there are multiple tables with the same name in the env
| DuplicateTable !TableName
-- | When there are multiple facts with the same name in the env
| DuplicateFact !TableName
-- | When there are multiple columns with the same name in a table in the env
| DuplicateColumn !TableName !ColumnName
-- | When there are multiple dimensions with the same name in the env
| DuplicateDimension !TableName
deriving (Eq, Show)
-- | A mapping of SQL types to their default values used to coleasce null column values in
-- the generated dimension and fact tables
type TypeDefaults = Map Text Text
-- | The environment for the library to compute in
data Env = Env
{ _envTables :: ![Table]
, _envFacts :: ![Fact]
@ -175,18 +435,26 @@ data Env = Env
, _envTypeDefaults :: !TypeDefaults
} deriving (Show)
-- | Return the list of source tables from the env
envTables :: Env -> [Table]
envTables = _envTables
-- | Return the list of facts to be generated from the env
envFacts :: Env -> [Fact]
envFacts = _envFacts
-- | Return the settings from the env
envSettings :: Env -> Settings
envSettings = _envSettings
-- | Return the defaults for the SQL types from the env
envTypeDefaults :: Env -> TypeDefaults
envTypeDefaults = _envTypeDefaults
data TablePopulationMode = FullPopulation | IncrementalPopulation deriving (Eq, Show)
-- | The mode for population of the generated tables; used to switch the SQL for table population
data TablePopulationMode = FullPopulation -- ^ Populating the tables fully, starting with empty ones
| IncrementalPopulation -- ^ Populating the tables incrementally
deriving (Eq, Show)
-- | The dependency graph of the generated tables in the order to be populated
type Dependencies = Map TableName [TableName]

View File

@ -63,7 +63,6 @@ validateFact Fact {..} = do
, let col = findColumn cName (tableColumns table)
, isJust col
, columnNullable (fromJust col) == Null ]
typeDefaultVs =
[ MissingTypeDefault cType
| cName <- [ c | FactColumn c DimVal {..} <- factColumns ]

View File

@ -1,2 +1,4 @@
import Test.DocTest
main = doctest ["-isrc", "Ringo"]
main = do
doctest ["-isrc", "Ringo"]
doctest ["-isrc", "Ringo.Types.Internal"]