Adds fact table extraction function

pull/1/head
Abhinav Sarkar 2015-12-14 21:57:11 +05:30
parent 9107b7c83d
commit 8e74c20705
2 changed files with 159 additions and 58 deletions

View File

@ -3,31 +3,38 @@ module Ringo where
import Ringo.Types
import qualified Data.Map as Map
import Data.Maybe (mapMaybe, fromMaybe)
import Data.List (nub)
import qualified Data.Text as T
import Data.Maybe (mapMaybe, fromMaybe, fromJust)
import Data.Monoid ((<>))
import Data.List (nub, find)
data ValidationError = MissingTable TableName
| MissingFact TableName
| MissingColumn TableName ColumnName
deriving (Eq, Show)
indexList :: Ord k => (a -> k) -> [a] -> Map.Map k a
indexList f = Map.fromList . map (\x -> (f x, x))
findTable :: TableName -> [Table] -> Maybe Table
findTable tName = find ((== tName) . tableName)
findFact :: TableName -> [Fact] -> Maybe Fact
findFact fName = find ((== fName) . factName)
findColumn :: ColumnName -> [Column] -> Maybe Column
findColumn cName = find ((== cName) . columnName)
checkTableForCol :: Table -> ColumnName -> [ValidationError]
checkTableForCol tab colName =
[MissingColumn (tableName tab) colName |
not . any ((colName ==) . columnName) . tableColumns $ tab]
[ MissingColumn (tableName tab) colName |
not . any ((colName ==) . columnName) . tableColumns $ tab ]
validateTable :: [Table] -> Table -> [ValidationError]
validateTable tables table = concatMap checkConstraint . tableConstraints $ table
where
tableMap = indexList tableName tables
checkConstraint (PrimaryKey colName) = checkTableForCol table colName
checkConstraint (UniqueKey columnNames) = checkTableForColRefs table columnNames
checkConstraint (ForeignKey oTableName columnNames) =
case Map.lookup oTableName tableMap of
case findTable oTableName tables of
Just oTable ->
checkTableForColRefs table (map fst columnNames)
++ checkTableForColRefs oTable (map snd columnNames)
@ -35,35 +42,104 @@ validateTable tables table = concatMap checkConstraint . tableConstraints $ tabl
checkTableForColRefs tab = concatMap (checkTableForCol tab)
validateFact :: [Table] -> Fact -> [ValidationError]
validateFact tables Fact {..} =
case Map.lookup factTableName tableMap of
validateFact :: [Table] -> [Fact] -> Fact -> [ValidationError]
validateFact tables facts Fact {..} =
case findTable factTableName tables of
Nothing -> [MissingTable factTableName]
Just table -> concatMap (checkColumn table) factColumns
Just table -> validateTable tables table
++ concatMap checkFactParents factParentNames
++ concatMap (checkColumn table) factColumns
where
tableMap = indexList tableName tables
checkFactParents fName = case findFact fName facts of
Nothing -> [MissingFact fName]
Just pFact -> validateFact tables facts pFact
checkColumn table = maybe [] (checkTableForCol table) . factColumnName
checkColumn table = checkTableForCol table . factColumnName
withFactValidation :: [Table] -> [Fact] -> Fact -> (Table -> a) -> Either [ValidationError] a
withFactValidation tables facts fact func =
let errors = validateFact tables facts fact
in if not $ null errors
then Left errors
else Right . func . fromJust $ findTable (factTableName fact) tables
extractDimensions :: T.Text -> Table -> Fact -> [Table]
extractDimensions prefix Table {..} Fact {..} =
map (\(dim, cols) -> Table { tableName = T.concat [prefix, dim]
, tableColumns = Column "id" "serial" NotNullable : cols
extractDimensions' :: [Table] -> T.Text -> Fact -> Table -> [Table]
extractDimensions' tables prefix fact Table {..} = dimsFromIds ++ dimsFromVals
where
dimsFromIds =
mapMaybe (\fcol -> case fcol of
DimId d _ -> findTable d tables
_ -> Nothing)
. factColumns
$ fact
dimsFromVals =
map (\(dim, cols) -> Table { tableName = prefix <> dim
, tableColumns = Column "id" "serial" NotNull : cols
, tableConstraints = [ PrimaryKey "id"
, UniqueKey (map columnName cols)
]
})
. Map.toList
. Map.mapWithKey (\dim -> map (cleanColumn dim) . nub)
. Map.fromListWith (++)
. Map.fromListWith (flip (++))
. mapMaybe (\fcol -> do
DimVal d col <- fcol
column <- Map.lookup col columnMap
column <- findColumn col tableColumns
return (d, [column]))
. map Just
$ factColumns
where
columnMap = indexList columnName tableColumns
. factColumns
$ fact
cleanColumn dim col@Column {..} =
col { columnName = fromMaybe columnName . T.stripPrefix (T.snoc dim '_') $ columnName }
extractDimensions :: [Table] -> [Fact] -> T.Text -> Fact -> Either [ValidationError] [Table]
extractDimensions tables facts prefix fact =
withFactValidation tables facts fact $ extractDimensions' tables prefix fact
extractAllDimensions' :: [Table] -> [Fact] -> T.Text -> Fact -> Table -> [Table]
extractAllDimensions' tables facts dimPrefix fact table = nub go
where
go = extractDimensions' tables dimPrefix fact table ++
if null $ factParentNames fact
then []
else concatMap extract . factParentNames $ fact
extract fName =
let pFact = fromJust . findFact fName $ facts
pFactTable = fromJust . findTable (factTableName pFact) $ tables
in extractAllDimensions' tables facts dimPrefix pFact pFactTable
extractFactTable :: [Table] -> [Fact] -> Settings -> Fact -> Either [ValidationError] Table
extractFactTable tables facts Settings {..} fact =
withFactValidation tables facts fact $ \table@Table{..} ->
let allDims = extractAllDimensions' tables facts settingDimPrefix fact table
sourceColumnType colName = columnType . fromJust $ findColumn colName tableColumns
columns = flip concatMap (factColumns fact) $ \col -> case col of
DimTime cName -> [ Column (timeUnitColName cName) "integer" NotNull ]
NoDimId cName -> [ fromJust . findColumn cName $ tableColumns ]
FactCount cName -> [ Column cName "integer" NotNull ]
FactSum scName cName -> [ Column cName (sourceColumnType scName) NotNull ]
FactAverage scName cName -> [ Column (cName <> "_count") "integer" NotNull
, Column (cName <> "_sum") (sourceColumnType scName) NotNull
, Column cName "double" NotNull
]
FactCountDistinct cName -> [ Column cName ("integer[]") NotNull ]
_ -> []
fks = flip map allDims $ \Table { tableName = tName, tableColumns = tCols } ->
let colName = fromMaybe tName (T.stripPrefix settingDimPrefix tName) <> "_id"
colNullable = if any ((== Null) . columnNullable) tCols then Null else NotNull
in (Column colName "integer" colNullable, ForeignKey tName [(colName, "id")])
ukColNames = mapMaybe (\col -> case col of
DimTime cName -> Just (timeUnitColName cName)
NoDimId cName -> Just cName
_ -> Nothing) (factColumns fact)
++ map (columnName . fst) fks
in Table { tableName = settingFactPrefix <> factName fact
, tableColumns = columns ++ map fst fks
, tableConstraints = UniqueKey ukColNames : map snd fks
}
where
timeUnitColName colName = colName <> "_" <> timeUnitName settingTimeUnit <> "_id"

View File

@ -1,50 +1,75 @@
module Ringo.Types where
import Data.Text (Text)
(&) :: a -> (a -> b) -> b
x & f = f x
import qualified Data.Text as T
type ColumnName = Text
type ColumnType = Text
type TableName = Text
data Nullable = Nullable | NotNullable deriving (Eq, Enum, Show)
data Nullable = Null | NotNull deriving (Eq, Enum, Show)
data Column = Column
{ columnName :: ColumnName
, columnType :: ColumnType
, columnNullable :: Nullable
{ columnName :: !ColumnName
, columnType :: !ColumnType
, columnNullable :: !Nullable
} deriving (Eq, Show)
data TableContraint = PrimaryKey ColumnName
| UniqueKey [ColumnName]
| ForeignKey TableName [(ColumnName, ColumnName)]
data TableContraint = PrimaryKey !ColumnName
| UniqueKey ![ColumnName]
| ForeignKey !TableName ![(ColumnName, ColumnName)]
deriving (Eq, Show)
data Table = Table
{ tableName :: TableName
, tableColumns :: [Column]
, tableConstraints :: [TableContraint]
{ tableName :: !TableName
, tableColumns :: ![Column]
, tableConstraints :: ![TableContraint]
} deriving (Eq, Show)
data TimeUnit = Second | Minute | Hour | Day | Week | Month | Year
deriving (Eq, Enum, Show)
timeUnitName :: TimeUnit -> Text
timeUnitName = T.toLower . T.pack . show
data Fact = Fact
{ factName :: TableName
, factTableName :: TableName
, factColumns :: [FactColumn]
{ factName :: !TableName
, factTableName :: !TableName
, factParentNames :: ![TableName]
, factColumns :: ![FactColumn]
} deriving (Eq, Show)
data FactColumn = DimTime ColumnName
| NoDimId ColumnName
| DimId TableName ColumnName
| DimVal TableName ColumnName
data FactValType = Count | Sum | Average | CountDistinct deriving (Eq, Enum, Show)
data FactColumn = DimTime !ColumnName
| NoDimId !ColumnName
| DimId !TableName !ColumnName
| DimVal !TableName !ColumnName
| FactCount !ColumnName
| FactSum !ColumnName !ColumnName
| FactAverage !ColumnName !ColumnName
| FactCountDistinct !ColumnName
deriving (Eq, Show)
factColumnName :: FactColumn -> ColumnName
factColumnName (DimTime cName) = cName
factColumnName (NoDimId cName) = cName
factColumnName (DimId _ cName) = cName
factColumnName (DimVal _ cName) = cName
factColumnName :: FactColumn -> Maybe ColumnName
factColumnName (DimTime cName) = Just cName
factColumnName (NoDimId cName) = Just cName
factColumnName (DimId _ cName) = Just cName
factColumnName (DimVal _ cName) = Just cName
factColumnName (FactCount _) = Nothing
factColumnName (FactSum cName _) = Just cName
factColumnName (FactAverage cName _) = Just cName
factColumnName (FactCountDistinct _) = Nothing
data Settings = Settings
{ settingDimPrefix :: !Text
, settingFactPrefix :: !Text
, settingTimeUnit :: !TimeUnit
} deriving (Eq, Show)
defSettings :: Settings
defSettings = Settings
{ settingDimPrefix = "dim_"
, settingFactPrefix = "fact_"
, settingTimeUnit = Minute
}