From 8e74c207056f831f3de0acba6bfd1f81dffb30fa Mon Sep 17 00:00:00 2001 From: Abhinav Sarkar Date: Mon, 14 Dec 2015 21:57:11 +0530 Subject: [PATCH] Adds fact table extraction function --- src/Ringo.hs | 142 ++++++++++++++++++++++++++++++++++----------- src/Ringo/Types.hs | 75 ++++++++++++++++-------- 2 files changed, 159 insertions(+), 58 deletions(-) diff --git a/src/Ringo.hs b/src/Ringo.hs index 0894696..957acca 100644 --- a/src/Ringo.hs +++ b/src/Ringo.hs @@ -3,31 +3,38 @@ module Ringo where import Ringo.Types import qualified Data.Map as Map -import Data.Maybe (mapMaybe, fromMaybe) -import Data.List (nub) import qualified Data.Text as T +import Data.Maybe (mapMaybe, fromMaybe, fromJust) +import Data.Monoid ((<>)) +import Data.List (nub, find) + data ValidationError = MissingTable TableName + | MissingFact TableName | MissingColumn TableName ColumnName deriving (Eq, Show) -indexList :: Ord k => (a -> k) -> [a] -> Map.Map k a -indexList f = Map.fromList . map (\x -> (f x, x)) +findTable :: TableName -> [Table] -> Maybe Table +findTable tName = find ((== tName) . tableName) + +findFact :: TableName -> [Fact] -> Maybe Fact +findFact fName = find ((== fName) . factName) + +findColumn :: ColumnName -> [Column] -> Maybe Column +findColumn cName = find ((== cName) . columnName) checkTableForCol :: Table -> ColumnName -> [ValidationError] checkTableForCol tab colName = - [MissingColumn (tableName tab) colName | - not . any ((colName ==) . columnName) . tableColumns $ tab] + [ MissingColumn (tableName tab) colName | + not . any ((colName ==) . columnName) . tableColumns $ tab ] validateTable :: [Table] -> Table -> [ValidationError] validateTable tables table = concatMap checkConstraint . tableConstraints $ table where - tableMap = indexList tableName tables - checkConstraint (PrimaryKey colName) = checkTableForCol table colName checkConstraint (UniqueKey columnNames) = checkTableForColRefs table columnNames checkConstraint (ForeignKey oTableName columnNames) = - case Map.lookup oTableName tableMap of + case findTable oTableName tables of Just oTable -> checkTableForColRefs table (map fst columnNames) ++ checkTableForColRefs oTable (map snd columnNames) @@ -35,35 +42,104 @@ validateTable tables table = concatMap checkConstraint . tableConstraints $ tabl checkTableForColRefs tab = concatMap (checkTableForCol tab) -validateFact :: [Table] -> Fact -> [ValidationError] -validateFact tables Fact {..} = - case Map.lookup factTableName tableMap of +validateFact :: [Table] -> [Fact] -> Fact -> [ValidationError] +validateFact tables facts Fact {..} = + case findTable factTableName tables of Nothing -> [MissingTable factTableName] - Just table -> concatMap (checkColumn table) factColumns + Just table -> validateTable tables table + ++ concatMap checkFactParents factParentNames + ++ concatMap (checkColumn table) factColumns where - tableMap = indexList tableName tables + checkFactParents fName = case findFact fName facts of + Nothing -> [MissingFact fName] + Just pFact -> validateFact tables facts pFact + checkColumn table = maybe [] (checkTableForCol table) . factColumnName - checkColumn table = checkTableForCol table . factColumnName +withFactValidation :: [Table] -> [Fact] -> Fact -> (Table -> a) -> Either [ValidationError] a +withFactValidation tables facts fact func = + let errors = validateFact tables facts fact + in if not $ null errors + then Left errors + else Right . func . fromJust $ findTable (factTableName fact) tables -extractDimensions :: T.Text -> Table -> Fact -> [Table] -extractDimensions prefix Table {..} Fact {..} = - map (\(dim, cols) -> Table { tableName = T.concat [prefix, dim] - , tableColumns = Column "id" "serial" NotNullable : cols - , tableConstraints = [ PrimaryKey "id" - , UniqueKey (map columnName cols) - ] - }) - . Map.toList - . Map.mapWithKey (\dim -> map (cleanColumn dim) . nub) - . Map.fromListWith (++) - . mapMaybe (\fcol -> do - DimVal d col <- fcol - column <- Map.lookup col columnMap - return (d, [column])) - . map Just - $ factColumns +extractDimensions' :: [Table] -> T.Text -> Fact -> Table -> [Table] +extractDimensions' tables prefix fact Table {..} = dimsFromIds ++ dimsFromVals where - columnMap = indexList columnName tableColumns + dimsFromIds = + mapMaybe (\fcol -> case fcol of + DimId d _ -> findTable d tables + _ -> Nothing) + . factColumns + $ fact + + dimsFromVals = + map (\(dim, cols) -> Table { tableName = prefix <> dim + , tableColumns = Column "id" "serial" NotNull : cols + , tableConstraints = [ PrimaryKey "id" + , UniqueKey (map columnName cols) + ] + }) + . Map.toList + . Map.mapWithKey (\dim -> map (cleanColumn dim) . nub) + . Map.fromListWith (flip (++)) + . mapMaybe (\fcol -> do + DimVal d col <- fcol + column <- findColumn col tableColumns + return (d, [column])) + . map Just + . factColumns + $ fact cleanColumn dim col@Column {..} = col { columnName = fromMaybe columnName . T.stripPrefix (T.snoc dim '_') $ columnName } + +extractDimensions :: [Table] -> [Fact] -> T.Text -> Fact -> Either [ValidationError] [Table] +extractDimensions tables facts prefix fact = + withFactValidation tables facts fact $ extractDimensions' tables prefix fact + +extractAllDimensions' :: [Table] -> [Fact] -> T.Text -> Fact -> Table -> [Table] +extractAllDimensions' tables facts dimPrefix fact table = nub go + where + go = extractDimensions' tables dimPrefix fact table ++ + if null $ factParentNames fact + then [] + else concatMap extract . factParentNames $ fact + + extract fName = + let pFact = fromJust . findFact fName $ facts + pFactTable = fromJust . findTable (factTableName pFact) $ tables + in extractAllDimensions' tables facts dimPrefix pFact pFactTable + +extractFactTable :: [Table] -> [Fact] -> Settings -> Fact -> Either [ValidationError] Table +extractFactTable tables facts Settings {..} fact = + withFactValidation tables facts fact $ \table@Table{..} -> + let allDims = extractAllDimensions' tables facts settingDimPrefix fact table + sourceColumnType colName = columnType . fromJust $ findColumn colName tableColumns + columns = flip concatMap (factColumns fact) $ \col -> case col of + DimTime cName -> [ Column (timeUnitColName cName) "integer" NotNull ] + NoDimId cName -> [ fromJust . findColumn cName $ tableColumns ] + FactCount cName -> [ Column cName "integer" NotNull ] + FactSum scName cName -> [ Column cName (sourceColumnType scName) NotNull ] + FactAverage scName cName -> [ Column (cName <> "_count") "integer" NotNull + , Column (cName <> "_sum") (sourceColumnType scName) NotNull + , Column cName "double" NotNull + ] + FactCountDistinct cName -> [ Column cName ("integer[]") NotNull ] + _ -> [] + fks = flip map allDims $ \Table { tableName = tName, tableColumns = tCols } -> + let colName = fromMaybe tName (T.stripPrefix settingDimPrefix tName) <> "_id" + colNullable = if any ((== Null) . columnNullable) tCols then Null else NotNull + in (Column colName "integer" colNullable, ForeignKey tName [(colName, "id")]) + ukColNames = mapMaybe (\col -> case col of + DimTime cName -> Just (timeUnitColName cName) + NoDimId cName -> Just cName + _ -> Nothing) (factColumns fact) + ++ map (columnName . fst) fks + in Table { tableName = settingFactPrefix <> factName fact + , tableColumns = columns ++ map fst fks + , tableConstraints = UniqueKey ukColNames : map snd fks + } + where + timeUnitColName colName = colName <> "_" <> timeUnitName settingTimeUnit <> "_id" + + diff --git a/src/Ringo/Types.hs b/src/Ringo/Types.hs index 8bc5933..a2b4858 100644 --- a/src/Ringo/Types.hs +++ b/src/Ringo/Types.hs @@ -1,50 +1,75 @@ module Ringo.Types where import Data.Text (Text) - -(&) :: a -> (a -> b) -> b -x & f = f x +import qualified Data.Text as T type ColumnName = Text type ColumnType = Text type TableName = Text -data Nullable = Nullable | NotNullable deriving (Eq, Enum, Show) +data Nullable = Null | NotNull deriving (Eq, Enum, Show) data Column = Column - { columnName :: ColumnName - , columnType :: ColumnType - , columnNullable :: Nullable + { columnName :: !ColumnName + , columnType :: !ColumnType + , columnNullable :: !Nullable } deriving (Eq, Show) -data TableContraint = PrimaryKey ColumnName - | UniqueKey [ColumnName] - | ForeignKey TableName [(ColumnName, ColumnName)] +data TableContraint = PrimaryKey !ColumnName + | UniqueKey ![ColumnName] + | ForeignKey !TableName ![(ColumnName, ColumnName)] deriving (Eq, Show) data Table = Table - { tableName :: TableName - , tableColumns :: [Column] - , tableConstraints :: [TableContraint] + { tableName :: !TableName + , tableColumns :: ![Column] + , tableConstraints :: ![TableContraint] } deriving (Eq, Show) data TimeUnit = Second | Minute | Hour | Day | Week | Month | Year deriving (Eq, Enum, Show) +timeUnitName :: TimeUnit -> Text +timeUnitName = T.toLower . T.pack . show + data Fact = Fact - { factName :: TableName - , factTableName :: TableName - , factColumns :: [FactColumn] + { factName :: !TableName + , factTableName :: !TableName + , factParentNames :: ![TableName] + , factColumns :: ![FactColumn] } deriving (Eq, Show) -data FactColumn = DimTime ColumnName - | NoDimId ColumnName - | DimId TableName ColumnName - | DimVal TableName ColumnName +data FactValType = Count | Sum | Average | CountDistinct deriving (Eq, Enum, Show) + +data FactColumn = DimTime !ColumnName + | NoDimId !ColumnName + | DimId !TableName !ColumnName + | DimVal !TableName !ColumnName + | FactCount !ColumnName + | FactSum !ColumnName !ColumnName + | FactAverage !ColumnName !ColumnName + | FactCountDistinct !ColumnName deriving (Eq, Show) -factColumnName :: FactColumn -> ColumnName -factColumnName (DimTime cName) = cName -factColumnName (NoDimId cName) = cName -factColumnName (DimId _ cName) = cName -factColumnName (DimVal _ cName) = cName +factColumnName :: FactColumn -> Maybe ColumnName +factColumnName (DimTime cName) = Just cName +factColumnName (NoDimId cName) = Just cName +factColumnName (DimId _ cName) = Just cName +factColumnName (DimVal _ cName) = Just cName +factColumnName (FactCount _) = Nothing +factColumnName (FactSum cName _) = Just cName +factColumnName (FactAverage cName _) = Just cName +factColumnName (FactCountDistinct _) = Nothing + +data Settings = Settings + { settingDimPrefix :: !Text + , settingFactPrefix :: !Text + , settingTimeUnit :: !TimeUnit + } deriving (Eq, Show) + +defSettings :: Settings +defSettings = Settings + { settingDimPrefix = "dim_" + , settingFactPrefix = "fact_" + , settingTimeUnit = Minute + }