From 900b4b7488a4bae1a702f2eaa18175723e70670f Mon Sep 17 00:00:00 2001 From: Abhinav Sarkar Date: Wed, 16 Dec 2015 16:57:10 +0530 Subject: [PATCH] Adds fact table insert SQL generation. --- src/Ringo/Extractor.hs | 22 ++++----- src/Ringo/Extractor/Internal.hs | 31 ++++++++++-- src/Ringo/Generator.hs | 86 ++++++++++++++++++++++++++++++--- src/Ringo/Types.hs | 9 +++- 4 files changed, 123 insertions(+), 25 deletions(-) diff --git a/src/Ringo/Extractor.hs b/src/Ringo/Extractor.hs index 691b074..7cc4302 100644 --- a/src/Ringo/Extractor.hs +++ b/src/Ringo/Extractor.hs @@ -4,10 +4,8 @@ module Ringo.Extractor , extractFactTable ) where -import qualified Data.Text as Text - import Control.Monad.Reader (Reader, asks) -import Data.Maybe (mapMaybe, fromMaybe, fromJust) +import Data.Maybe (mapMaybe, fromJust) import Data.Monoid ((<>)) import Ringo.Extractor.Internal @@ -24,31 +22,29 @@ extractFactTable fact = do sourceColumnType colName = columnType . fromJust . findColumn colName . tableColumns $ table columns = flip concatMap (factColumns fact) $ \col -> case col of - DimTime cName -> [ Column (timeUnitColName cName settingTimeUnit) intType NotNull ] + DimTime cName -> [ Column (timeUnitColumnName cName settingTimeUnit) intType NotNull ] NoDimId cName -> [ fromJust . findColumn cName . tableColumns $ table] FactCount cName -> [ Column cName intType NotNull ] FactSum scName cName -> [ Column cName (sourceColumnType scName) NotNull ] - FactAverage scName cName -> [ Column (cName <> "_count") intType NotNull - , Column (cName <> "_sum") (sourceColumnType scName) NotNull + FactAverage scName cName -> [ Column (averageCountColummName cName) intType NotNull + , Column (averageSumColumnName cName) (sourceColumnType scName) NotNull ] - FactCountDistinct cName -> [ Column (cName <> "_hll") (intType <> "[]") NotNull ] + FactCountDistinct cName -> [ Column (countDistinctColumnName cName) (intType <> "[]") NotNull ] _ -> [] - fks = flip map allDims $ \Table { .. } -> - let colName = fromMaybe tableName (Text.stripPrefix settingDimPrefix tableName) <> "_id" + fks = flip map allDims $ \(_, Table {..}) -> + let colName = factDimFKIdColumnName settingDimPrefix tableName colNullable = if any ((== Null) . columnNullable) tableColumns then Null else NotNull in (Column colName intType colNullable, ForeignKey tableName [(colName, "id")]) ukColNames = (++ map (columnName . fst) fks) . flip mapMaybe (factColumns fact) $ \col -> case col of - DimTime cName -> Just (timeUnitColName cName settingTimeUnit) + DimTime cName -> Just (timeUnitColumnName cName settingTimeUnit) NoDimId cName -> Just cName _ -> Nothing - return Table { tableName = settingFactPrefix <> factName fact + return Table { tableName = extractedFactTableName settingFactPrefix (factName fact) settingTimeUnit , tableColumns = columns ++ map fst fks , tableConstraints = UniqueKey ukColNames : map snd fks } - where - timeUnitColName colName timeUnit = colName <> "_" <> timeUnitName timeUnit <> "_id" diff --git a/src/Ringo/Extractor/Internal.hs b/src/Ringo/Extractor/Internal.hs index 2be549b..27309d7 100644 --- a/src/Ringo/Extractor/Internal.hs +++ b/src/Ringo/Extractor/Internal.hs @@ -4,9 +4,10 @@ import qualified Data.Map as Map import qualified Data.Text as Text import Control.Monad.Reader (Reader, asks) +import Data.Function (on) import Data.Maybe (mapMaybe, fromMaybe, fromJust) import Data.Monoid ((<>)) -import Data.List (nub) +import Data.List (nub, nubBy) import Ringo.Types import Ringo.Utils @@ -15,6 +16,26 @@ dimColumnName :: Text.Text -> ColumnName -> ColumnName dimColumnName dimName columnName = fromMaybe columnName . Text.stripPrefix (dimName <> "_") $ columnName +timeUnitColumnName :: ColumnName -> TimeUnit -> ColumnName +timeUnitColumnName colName timeUnit = colName <> "_" <> timeUnitName timeUnit <> "_id" + +averageCountColummName :: ColumnName -> ColumnName +averageCountColummName colName = colName <> "_count" + +averageSumColumnName :: ColumnName -> ColumnName +averageSumColumnName colName = colName <> "_sum" + +countDistinctColumnName :: ColumnName -> ColumnName +countDistinctColumnName colName = colName <> "_hll" + +factDimFKIdColumnName :: Text.Text -> TableName -> ColumnName +factDimFKIdColumnName dimPrefix dimTableName = + fromMaybe dimTableName (Text.stripPrefix dimPrefix dimTableName) <> "_id" + +extractedFactTableName :: Text.Text -> TableName -> TimeUnit -> TableName +extractedFactTableName factPrefix factName timeUnit = + factPrefix <> factName <> "_by_" <> timeUnitName timeUnit + extractDimensionTables :: Fact -> Reader Env [Table] extractDimensionTables fact = do tables <- asks envTables @@ -47,12 +68,12 @@ extractDimensionTables fact = do . factColumns $ fact -extractAllDimensionTables :: Fact -> Reader Env [Table] +extractAllDimensionTables :: Fact -> Reader Env [(Fact, Table)] extractAllDimensionTables fact = do - myDims <- extractDimensionTables fact + myDims <- map (fact,) <$> extractDimensionTables fact parentDims <- concat <$> mapM extract (factParentNames fact) - return . nub $ myDims ++ parentDims + return . nubBy ((==) `on` snd) $ myDims ++ parentDims where extract fName = do - facts <- asks envFacts + facts <- asks envFacts extractAllDimensionTables . fromJust . findFact fName $ facts diff --git a/src/Ringo/Generator.hs b/src/Ringo/Generator.hs index 1c5f7f3..115f60d 100644 --- a/src/Ringo/Generator.hs +++ b/src/Ringo/Generator.hs @@ -1,18 +1,20 @@ module Ringo.Generator ( tableDefnSQL , dimensionTableInsertSQL + , factTableInsertSQL ) where import qualified Data.Text as Text import Control.Monad.Reader (Reader, asks) -import Data.List (intersperse) -import Data.Maybe (mapMaybe) +import Data.List (intersperse, nub, find) +import Data.Maybe (fromJust, mapMaybe) import Data.Monoid ((<>)) import Data.Text (Text) import Ringo.Extractor.Internal import Ringo.Types +import Ringo.Utils nullableDefnSQL :: Nullable -> Text nullableDefnSQL Null = "NULL" @@ -43,16 +45,88 @@ tableDefnSQL Table {..} = <> (Text.concat . intersperse ",\n" . map columnDefnSQL $ tableColumns) <> "\n)" +dimColumnMapping :: Text -> Fact -> TableName -> [(ColumnName, ColumnName)] +dimColumnMapping dimPrefix fact dimTableName = + flip mapMaybe (factColumns fact) $ \fCol -> case fCol of + DimVal dName cName | dimPrefix <> dName == dimTableName -> + Just (dimColumnName dName cName, cName) + _ -> Nothing + dimensionTableInsertSQL :: Fact -> TableName -> Reader Env Text dimensionTableInsertSQL fact dimTableName = do dimPrefix <- settingDimPrefix <$> asks envSettings - let colMapping = flip mapMaybe (factColumns fact) $ \fCol -> case fCol of - DimVal dName cName | dimPrefix <> dName == dimTableName -> - Just (dimColumnName dName cName, cName) - _ -> Nothing + let colMapping = dimColumnMapping dimPrefix fact dimTableName return $ "INSERT INTO " <> dimTableName <> " (\n" <> colNamesString (map fst colMapping) <> "\n) SELECT DISTINCT \n" <> colNamesString (map snd colMapping) <> "\nFROM " <> factTableName fact + +factTableInsertSQL :: Fact -> Reader Env Text +factTableInsertSQL fact= do + let fTableName = factTableName fact + Settings {..} <- asks envSettings + allDims <- extractAllDimensionTables fact + tables <- asks envTables + let table = fromJust . findTable fTableName $ tables + + let timeUnitColumnInsertSQL cName = + let colName = timeUnitColumnName cName settingTimeUnit + in (colName, "floor(extract(epoch from " <> fullColName fTableName cName <> ")/" + <> Text.pack (show $ timeUnitToSeconds settingTimeUnit) <> ")") + + factColMap = flip concatMap (factColumns fact) $ \col -> case col of + DimTime cName -> [ timeUnitColumnInsertSQL cName ] + NoDimId cName -> [ (cName, fullColName fTableName cName) ] + FactCount cName -> [ (cName, "count(*)") ] + FactSum scName cName -> [ (cName, "sum(" <> fullColName fTableName scName <> ")") ] + FactAverage scName cName -> [ ( averageCountColummName cName + , "count(" <> fullColName fTableName scName <> ")") + , ( averageSumColumnName cName + , "sum(" <> fullColName fTableName scName <> ")") ] + _ -> [] + + dimColMap = flip map allDims $ \(dimFact, factTable@Table {..}) -> + let colName = factDimFKIdColumnName settingDimPrefix tableName + factSourceTableName = factTableName dimFact + insertSQL = + if factTable `elem` tables + then fullColName factSourceTableName colName + else + let dimLookupWhereClauses = + map (\(c1, c2) -> + fullColName tableName c1 <> " = " <> fullColName factSourceTableName c2) + $ dimColumnMapping settingDimPrefix dimFact tableName + in "SELECT id FROM " <> tableName <> "\nWHERE " + <> (Text.concat . intersperse "\n AND " $ dimLookupWhereClauses) + in (colName, insertSQL) + + colMap = map (\(cName, sql) -> (cName, asName cName sql)) $ factColMap ++ dimColMap + + joinClauses = + mapMaybe (\tName -> (\p -> "LEFT JOIN " <> tName <> " ON "<> p) <$> joinClausePreds table tName) + . nub + . map (\(dimFact, _) -> factTableName dimFact) + $ allDims + + return $ "INSERT INTO " <> extractedFactTableName settingFactPrefix (factName fact) settingTimeUnit + <> " (\n" <> Text.concat (intersperse ",\n " . map fst $ colMap) <> "\n)" + <> "\nSELECT \n" <> Text.concat (intersperse ",\n " . map snd $ colMap) + <> "\nFROM " <> fTableName <> "\n" <> Text.concat (intersperse "\n" joinClauses) + <> "\nGROUP BY \n" <> Text.concat (intersperse ",\n " . map fst $ colMap) + where + fullColName tName cName = tName <> "." <> cName + + asName cName sql = "(" <> sql <> ")" <> " as " <> cName + + joinClausePreds table oTableName = + fmap (\(ForeignKey _ colPairs) -> + Text.concat . intersperse " AND " + . map (\(c1, c2) -> fullColName (tableName table) c1 <> " = " <> fullColName oTableName c2) + $ colPairs ) + . find (\cons -> case cons of + ForeignKey tName _ -> tName == oTableName + _ -> False) + . tableConstraints + $ table diff --git a/src/Ringo/Types.hs b/src/Ringo/Types.hs index 4017ef7..fb5ef12 100644 --- a/src/Ringo/Types.hs +++ b/src/Ringo/Types.hs @@ -26,12 +26,19 @@ data Table = Table , tableConstraints :: ![TableConstraint] } deriving (Eq, Show) -data TimeUnit = Second | Minute | Hour | Day | Week | Month | Year +data TimeUnit = Second | Minute | Hour | Day | Week deriving (Eq, Enum, Show) timeUnitName :: TimeUnit -> Text timeUnitName = T.toLower . T.pack . show +timeUnitToSeconds :: TimeUnit -> Int +timeUnitToSeconds Second = 1 +timeUnitToSeconds Minute = 60 * timeUnitToSeconds Second +timeUnitToSeconds Hour = 60 * timeUnitToSeconds Minute +timeUnitToSeconds Day = 24 * timeUnitToSeconds Hour +timeUnitToSeconds Week = 7 * timeUnitToSeconds Day + data Fact = Fact { factName :: !TableName , factTableName :: !TableName