Adds fact table extraction function
This commit is contained in:
parent
9107b7c83d
commit
8e74c20705
120
src/Ringo.hs
120
src/Ringo.hs
@ -3,16 +3,25 @@ module Ringo where
|
||||
import Ringo.Types
|
||||
|
||||
import qualified Data.Map as Map
|
||||
import Data.Maybe (mapMaybe, fromMaybe)
|
||||
import Data.List (nub)
|
||||
import qualified Data.Text as T
|
||||
|
||||
import Data.Maybe (mapMaybe, fromMaybe, fromJust)
|
||||
import Data.Monoid ((<>))
|
||||
import Data.List (nub, find)
|
||||
|
||||
data ValidationError = MissingTable TableName
|
||||
| MissingFact TableName
|
||||
| MissingColumn TableName ColumnName
|
||||
deriving (Eq, Show)
|
||||
|
||||
indexList :: Ord k => (a -> k) -> [a] -> Map.Map k a
|
||||
indexList f = Map.fromList . map (\x -> (f x, x))
|
||||
findTable :: TableName -> [Table] -> Maybe Table
|
||||
findTable tName = find ((== tName) . tableName)
|
||||
|
||||
findFact :: TableName -> [Fact] -> Maybe Fact
|
||||
findFact fName = find ((== fName) . factName)
|
||||
|
||||
findColumn :: ColumnName -> [Column] -> Maybe Column
|
||||
findColumn cName = find ((== cName) . columnName)
|
||||
|
||||
checkTableForCol :: Table -> ColumnName -> [ValidationError]
|
||||
checkTableForCol tab colName =
|
||||
@ -22,12 +31,10 @@ checkTableForCol tab colName =
|
||||
validateTable :: [Table] -> Table -> [ValidationError]
|
||||
validateTable tables table = concatMap checkConstraint . tableConstraints $ table
|
||||
where
|
||||
tableMap = indexList tableName tables
|
||||
|
||||
checkConstraint (PrimaryKey colName) = checkTableForCol table colName
|
||||
checkConstraint (UniqueKey columnNames) = checkTableForColRefs table columnNames
|
||||
checkConstraint (ForeignKey oTableName columnNames) =
|
||||
case Map.lookup oTableName tableMap of
|
||||
case findTable oTableName tables of
|
||||
Just oTable ->
|
||||
checkTableForColRefs table (map fst columnNames)
|
||||
++ checkTableForColRefs oTable (map snd columnNames)
|
||||
@ -35,35 +42,104 @@ validateTable tables table = concatMap checkConstraint . tableConstraints $ tabl
|
||||
|
||||
checkTableForColRefs tab = concatMap (checkTableForCol tab)
|
||||
|
||||
validateFact :: [Table] -> Fact -> [ValidationError]
|
||||
validateFact tables Fact {..} =
|
||||
case Map.lookup factTableName tableMap of
|
||||
validateFact :: [Table] -> [Fact] -> Fact -> [ValidationError]
|
||||
validateFact tables facts Fact {..} =
|
||||
case findTable factTableName tables of
|
||||
Nothing -> [MissingTable factTableName]
|
||||
Just table -> concatMap (checkColumn table) factColumns
|
||||
Just table -> validateTable tables table
|
||||
++ concatMap checkFactParents factParentNames
|
||||
++ concatMap (checkColumn table) factColumns
|
||||
where
|
||||
tableMap = indexList tableName tables
|
||||
checkFactParents fName = case findFact fName facts of
|
||||
Nothing -> [MissingFact fName]
|
||||
Just pFact -> validateFact tables facts pFact
|
||||
checkColumn table = maybe [] (checkTableForCol table) . factColumnName
|
||||
|
||||
checkColumn table = checkTableForCol table . factColumnName
|
||||
withFactValidation :: [Table] -> [Fact] -> Fact -> (Table -> a) -> Either [ValidationError] a
|
||||
withFactValidation tables facts fact func =
|
||||
let errors = validateFact tables facts fact
|
||||
in if not $ null errors
|
||||
then Left errors
|
||||
else Right . func . fromJust $ findTable (factTableName fact) tables
|
||||
|
||||
extractDimensions :: T.Text -> Table -> Fact -> [Table]
|
||||
extractDimensions prefix Table {..} Fact {..} =
|
||||
map (\(dim, cols) -> Table { tableName = T.concat [prefix, dim]
|
||||
, tableColumns = Column "id" "serial" NotNullable : cols
|
||||
extractDimensions' :: [Table] -> T.Text -> Fact -> Table -> [Table]
|
||||
extractDimensions' tables prefix fact Table {..} = dimsFromIds ++ dimsFromVals
|
||||
where
|
||||
dimsFromIds =
|
||||
mapMaybe (\fcol -> case fcol of
|
||||
DimId d _ -> findTable d tables
|
||||
_ -> Nothing)
|
||||
. factColumns
|
||||
$ fact
|
||||
|
||||
dimsFromVals =
|
||||
map (\(dim, cols) -> Table { tableName = prefix <> dim
|
||||
, tableColumns = Column "id" "serial" NotNull : cols
|
||||
, tableConstraints = [ PrimaryKey "id"
|
||||
, UniqueKey (map columnName cols)
|
||||
]
|
||||
})
|
||||
. Map.toList
|
||||
. Map.mapWithKey (\dim -> map (cleanColumn dim) . nub)
|
||||
. Map.fromListWith (++)
|
||||
. Map.fromListWith (flip (++))
|
||||
. mapMaybe (\fcol -> do
|
||||
DimVal d col <- fcol
|
||||
column <- Map.lookup col columnMap
|
||||
column <- findColumn col tableColumns
|
||||
return (d, [column]))
|
||||
. map Just
|
||||
$ factColumns
|
||||
where
|
||||
columnMap = indexList columnName tableColumns
|
||||
. factColumns
|
||||
$ fact
|
||||
|
||||
cleanColumn dim col@Column {..} =
|
||||
col { columnName = fromMaybe columnName . T.stripPrefix (T.snoc dim '_') $ columnName }
|
||||
|
||||
extractDimensions :: [Table] -> [Fact] -> T.Text -> Fact -> Either [ValidationError] [Table]
|
||||
extractDimensions tables facts prefix fact =
|
||||
withFactValidation tables facts fact $ extractDimensions' tables prefix fact
|
||||
|
||||
extractAllDimensions' :: [Table] -> [Fact] -> T.Text -> Fact -> Table -> [Table]
|
||||
extractAllDimensions' tables facts dimPrefix fact table = nub go
|
||||
where
|
||||
go = extractDimensions' tables dimPrefix fact table ++
|
||||
if null $ factParentNames fact
|
||||
then []
|
||||
else concatMap extract . factParentNames $ fact
|
||||
|
||||
extract fName =
|
||||
let pFact = fromJust . findFact fName $ facts
|
||||
pFactTable = fromJust . findTable (factTableName pFact) $ tables
|
||||
in extractAllDimensions' tables facts dimPrefix pFact pFactTable
|
||||
|
||||
extractFactTable :: [Table] -> [Fact] -> Settings -> Fact -> Either [ValidationError] Table
|
||||
extractFactTable tables facts Settings {..} fact =
|
||||
withFactValidation tables facts fact $ \table@Table{..} ->
|
||||
let allDims = extractAllDimensions' tables facts settingDimPrefix fact table
|
||||
sourceColumnType colName = columnType . fromJust $ findColumn colName tableColumns
|
||||
columns = flip concatMap (factColumns fact) $ \col -> case col of
|
||||
DimTime cName -> [ Column (timeUnitColName cName) "integer" NotNull ]
|
||||
NoDimId cName -> [ fromJust . findColumn cName $ tableColumns ]
|
||||
FactCount cName -> [ Column cName "integer" NotNull ]
|
||||
FactSum scName cName -> [ Column cName (sourceColumnType scName) NotNull ]
|
||||
FactAverage scName cName -> [ Column (cName <> "_count") "integer" NotNull
|
||||
, Column (cName <> "_sum") (sourceColumnType scName) NotNull
|
||||
, Column cName "double" NotNull
|
||||
]
|
||||
FactCountDistinct cName -> [ Column cName ("integer[]") NotNull ]
|
||||
_ -> []
|
||||
fks = flip map allDims $ \Table { tableName = tName, tableColumns = tCols } ->
|
||||
let colName = fromMaybe tName (T.stripPrefix settingDimPrefix tName) <> "_id"
|
||||
colNullable = if any ((== Null) . columnNullable) tCols then Null else NotNull
|
||||
in (Column colName "integer" colNullable, ForeignKey tName [(colName, "id")])
|
||||
ukColNames = mapMaybe (\col -> case col of
|
||||
DimTime cName -> Just (timeUnitColName cName)
|
||||
NoDimId cName -> Just cName
|
||||
_ -> Nothing) (factColumns fact)
|
||||
++ map (columnName . fst) fks
|
||||
in Table { tableName = settingFactPrefix <> factName fact
|
||||
, tableColumns = columns ++ map fst fks
|
||||
, tableConstraints = UniqueKey ukColNames : map snd fks
|
||||
}
|
||||
where
|
||||
timeUnitColName colName = colName <> "_" <> timeUnitName settingTimeUnit <> "_id"
|
||||
|
||||
|
||||
|
@ -1,50 +1,75 @@
|
||||
module Ringo.Types where
|
||||
|
||||
import Data.Text (Text)
|
||||
|
||||
(&) :: a -> (a -> b) -> b
|
||||
x & f = f x
|
||||
import qualified Data.Text as T
|
||||
|
||||
type ColumnName = Text
|
||||
type ColumnType = Text
|
||||
type TableName = Text
|
||||
|
||||
data Nullable = Nullable | NotNullable deriving (Eq, Enum, Show)
|
||||
data Nullable = Null | NotNull deriving (Eq, Enum, Show)
|
||||
|
||||
data Column = Column
|
||||
{ columnName :: ColumnName
|
||||
, columnType :: ColumnType
|
||||
, columnNullable :: Nullable
|
||||
{ columnName :: !ColumnName
|
||||
, columnType :: !ColumnType
|
||||
, columnNullable :: !Nullable
|
||||
} deriving (Eq, Show)
|
||||
|
||||
data TableContraint = PrimaryKey ColumnName
|
||||
| UniqueKey [ColumnName]
|
||||
| ForeignKey TableName [(ColumnName, ColumnName)]
|
||||
data TableContraint = PrimaryKey !ColumnName
|
||||
| UniqueKey ![ColumnName]
|
||||
| ForeignKey !TableName ![(ColumnName, ColumnName)]
|
||||
deriving (Eq, Show)
|
||||
|
||||
data Table = Table
|
||||
{ tableName :: TableName
|
||||
, tableColumns :: [Column]
|
||||
, tableConstraints :: [TableContraint]
|
||||
{ tableName :: !TableName
|
||||
, tableColumns :: ![Column]
|
||||
, tableConstraints :: ![TableContraint]
|
||||
} deriving (Eq, Show)
|
||||
|
||||
data TimeUnit = Second | Minute | Hour | Day | Week | Month | Year
|
||||
deriving (Eq, Enum, Show)
|
||||
|
||||
timeUnitName :: TimeUnit -> Text
|
||||
timeUnitName = T.toLower . T.pack . show
|
||||
|
||||
data Fact = Fact
|
||||
{ factName :: TableName
|
||||
, factTableName :: TableName
|
||||
, factColumns :: [FactColumn]
|
||||
{ factName :: !TableName
|
||||
, factTableName :: !TableName
|
||||
, factParentNames :: ![TableName]
|
||||
, factColumns :: ![FactColumn]
|
||||
} deriving (Eq, Show)
|
||||
|
||||
data FactColumn = DimTime ColumnName
|
||||
| NoDimId ColumnName
|
||||
| DimId TableName ColumnName
|
||||
| DimVal TableName ColumnName
|
||||
data FactValType = Count | Sum | Average | CountDistinct deriving (Eq, Enum, Show)
|
||||
|
||||
data FactColumn = DimTime !ColumnName
|
||||
| NoDimId !ColumnName
|
||||
| DimId !TableName !ColumnName
|
||||
| DimVal !TableName !ColumnName
|
||||
| FactCount !ColumnName
|
||||
| FactSum !ColumnName !ColumnName
|
||||
| FactAverage !ColumnName !ColumnName
|
||||
| FactCountDistinct !ColumnName
|
||||
deriving (Eq, Show)
|
||||
|
||||
factColumnName :: FactColumn -> ColumnName
|
||||
factColumnName (DimTime cName) = cName
|
||||
factColumnName (NoDimId cName) = cName
|
||||
factColumnName (DimId _ cName) = cName
|
||||
factColumnName (DimVal _ cName) = cName
|
||||
factColumnName :: FactColumn -> Maybe ColumnName
|
||||
factColumnName (DimTime cName) = Just cName
|
||||
factColumnName (NoDimId cName) = Just cName
|
||||
factColumnName (DimId _ cName) = Just cName
|
||||
factColumnName (DimVal _ cName) = Just cName
|
||||
factColumnName (FactCount _) = Nothing
|
||||
factColumnName (FactSum cName _) = Just cName
|
||||
factColumnName (FactAverage cName _) = Just cName
|
||||
factColumnName (FactCountDistinct _) = Nothing
|
||||
|
||||
data Settings = Settings
|
||||
{ settingDimPrefix :: !Text
|
||||
, settingFactPrefix :: !Text
|
||||
, settingTimeUnit :: !TimeUnit
|
||||
} deriving (Eq, Show)
|
||||
|
||||
defSettings :: Settings
|
||||
defSettings = Settings
|
||||
{ settingDimPrefix = "dim_"
|
||||
, settingFactPrefix = "fact_"
|
||||
, settingTimeUnit = Minute
|
||||
}
|
||||
|
Loading…
Reference in New Issue
Block a user