Adds fact table insert SQL generation.

pull/1/head
Abhinav Sarkar 2015-12-16 16:57:10 +05:30
parent c39916704e
commit 900b4b7488
4 changed files with 123 additions and 25 deletions

View File

@ -4,10 +4,8 @@ module Ringo.Extractor
, extractFactTable , extractFactTable
) where ) where
import qualified Data.Text as Text
import Control.Monad.Reader (Reader, asks) import Control.Monad.Reader (Reader, asks)
import Data.Maybe (mapMaybe, fromMaybe, fromJust) import Data.Maybe (mapMaybe, fromJust)
import Data.Monoid ((<>)) import Data.Monoid ((<>))
import Ringo.Extractor.Internal import Ringo.Extractor.Internal
@ -24,31 +22,29 @@ extractFactTable fact = do
sourceColumnType colName = columnType . fromJust . findColumn colName . tableColumns $ table sourceColumnType colName = columnType . fromJust . findColumn colName . tableColumns $ table
columns = flip concatMap (factColumns fact) $ \col -> case col of columns = flip concatMap (factColumns fact) $ \col -> case col of
DimTime cName -> [ Column (timeUnitColName cName settingTimeUnit) intType NotNull ] DimTime cName -> [ Column (timeUnitColumnName cName settingTimeUnit) intType NotNull ]
NoDimId cName -> [ fromJust . findColumn cName . tableColumns $ table] NoDimId cName -> [ fromJust . findColumn cName . tableColumns $ table]
FactCount cName -> [ Column cName intType NotNull ] FactCount cName -> [ Column cName intType NotNull ]
FactSum scName cName -> [ Column cName (sourceColumnType scName) NotNull ] FactSum scName cName -> [ Column cName (sourceColumnType scName) NotNull ]
FactAverage scName cName -> [ Column (cName <> "_count") intType NotNull FactAverage scName cName -> [ Column (averageCountColummName cName) intType NotNull
, Column (cName <> "_sum") (sourceColumnType scName) NotNull , Column (averageSumColumnName cName) (sourceColumnType scName) NotNull
] ]
FactCountDistinct cName -> [ Column (cName <> "_hll") (intType <> "[]") NotNull ] FactCountDistinct cName -> [ Column (countDistinctColumnName cName) (intType <> "[]") NotNull ]
_ -> [] _ -> []
fks = flip map allDims $ \Table { .. } -> fks = flip map allDims $ \(_, Table {..}) ->
let colName = fromMaybe tableName (Text.stripPrefix settingDimPrefix tableName) <> "_id" let colName = factDimFKIdColumnName settingDimPrefix tableName
colNullable = if any ((== Null) . columnNullable) tableColumns then Null else NotNull colNullable = if any ((== Null) . columnNullable) tableColumns then Null else NotNull
in (Column colName intType colNullable, ForeignKey tableName [(colName, "id")]) in (Column colName intType colNullable, ForeignKey tableName [(colName, "id")])
ukColNames = ukColNames =
(++ map (columnName . fst) fks) (++ map (columnName . fst) fks)
. flip mapMaybe (factColumns fact) $ \col -> case col of . flip mapMaybe (factColumns fact) $ \col -> case col of
DimTime cName -> Just (timeUnitColName cName settingTimeUnit) DimTime cName -> Just (timeUnitColumnName cName settingTimeUnit)
NoDimId cName -> Just cName NoDimId cName -> Just cName
_ -> Nothing _ -> Nothing
return Table { tableName = settingFactPrefix <> factName fact return Table { tableName = extractedFactTableName settingFactPrefix (factName fact) settingTimeUnit
, tableColumns = columns ++ map fst fks , tableColumns = columns ++ map fst fks
, tableConstraints = UniqueKey ukColNames : map snd fks , tableConstraints = UniqueKey ukColNames : map snd fks
} }
where
timeUnitColName colName timeUnit = colName <> "_" <> timeUnitName timeUnit <> "_id"

View File

@ -4,9 +4,10 @@ import qualified Data.Map as Map
import qualified Data.Text as Text import qualified Data.Text as Text
import Control.Monad.Reader (Reader, asks) import Control.Monad.Reader (Reader, asks)
import Data.Function (on)
import Data.Maybe (mapMaybe, fromMaybe, fromJust) import Data.Maybe (mapMaybe, fromMaybe, fromJust)
import Data.Monoid ((<>)) import Data.Monoid ((<>))
import Data.List (nub) import Data.List (nub, nubBy)
import Ringo.Types import Ringo.Types
import Ringo.Utils import Ringo.Utils
@ -15,6 +16,26 @@ dimColumnName :: Text.Text -> ColumnName -> ColumnName
dimColumnName dimName columnName = dimColumnName dimName columnName =
fromMaybe columnName . Text.stripPrefix (dimName <> "_") $ columnName fromMaybe columnName . Text.stripPrefix (dimName <> "_") $ columnName
timeUnitColumnName :: ColumnName -> TimeUnit -> ColumnName
timeUnitColumnName colName timeUnit = colName <> "_" <> timeUnitName timeUnit <> "_id"
averageCountColummName :: ColumnName -> ColumnName
averageCountColummName colName = colName <> "_count"
averageSumColumnName :: ColumnName -> ColumnName
averageSumColumnName colName = colName <> "_sum"
countDistinctColumnName :: ColumnName -> ColumnName
countDistinctColumnName colName = colName <> "_hll"
factDimFKIdColumnName :: Text.Text -> TableName -> ColumnName
factDimFKIdColumnName dimPrefix dimTableName =
fromMaybe dimTableName (Text.stripPrefix dimPrefix dimTableName) <> "_id"
extractedFactTableName :: Text.Text -> TableName -> TimeUnit -> TableName
extractedFactTableName factPrefix factName timeUnit =
factPrefix <> factName <> "_by_" <> timeUnitName timeUnit
extractDimensionTables :: Fact -> Reader Env [Table] extractDimensionTables :: Fact -> Reader Env [Table]
extractDimensionTables fact = do extractDimensionTables fact = do
tables <- asks envTables tables <- asks envTables
@ -47,12 +68,12 @@ extractDimensionTables fact = do
. factColumns . factColumns
$ fact $ fact
extractAllDimensionTables :: Fact -> Reader Env [Table] extractAllDimensionTables :: Fact -> Reader Env [(Fact, Table)]
extractAllDimensionTables fact = do extractAllDimensionTables fact = do
myDims <- extractDimensionTables fact myDims <- map (fact,) <$> extractDimensionTables fact
parentDims <- concat <$> mapM extract (factParentNames fact) parentDims <- concat <$> mapM extract (factParentNames fact)
return . nub $ myDims ++ parentDims return . nubBy ((==) `on` snd) $ myDims ++ parentDims
where where
extract fName = do extract fName = do
facts <- asks envFacts facts <- asks envFacts
extractAllDimensionTables . fromJust . findFact fName $ facts extractAllDimensionTables . fromJust . findFact fName $ facts

View File

@ -1,18 +1,20 @@
module Ringo.Generator module Ringo.Generator
( tableDefnSQL ( tableDefnSQL
, dimensionTableInsertSQL , dimensionTableInsertSQL
, factTableInsertSQL
) where ) where
import qualified Data.Text as Text import qualified Data.Text as Text
import Control.Monad.Reader (Reader, asks) import Control.Monad.Reader (Reader, asks)
import Data.List (intersperse) import Data.List (intersperse, nub, find)
import Data.Maybe (mapMaybe) import Data.Maybe (fromJust, mapMaybe)
import Data.Monoid ((<>)) import Data.Monoid ((<>))
import Data.Text (Text) import Data.Text (Text)
import Ringo.Extractor.Internal import Ringo.Extractor.Internal
import Ringo.Types import Ringo.Types
import Ringo.Utils
nullableDefnSQL :: Nullable -> Text nullableDefnSQL :: Nullable -> Text
nullableDefnSQL Null = "NULL" nullableDefnSQL Null = "NULL"
@ -43,16 +45,88 @@ tableDefnSQL Table {..} =
<> (Text.concat . intersperse ",\n" . map columnDefnSQL $ tableColumns) <> (Text.concat . intersperse ",\n" . map columnDefnSQL $ tableColumns)
<> "\n)" <> "\n)"
dimColumnMapping :: Text -> Fact -> TableName -> [(ColumnName, ColumnName)]
dimColumnMapping dimPrefix fact dimTableName =
flip mapMaybe (factColumns fact) $ \fCol -> case fCol of
DimVal dName cName | dimPrefix <> dName == dimTableName ->
Just (dimColumnName dName cName, cName)
_ -> Nothing
dimensionTableInsertSQL :: Fact -> TableName -> Reader Env Text dimensionTableInsertSQL :: Fact -> TableName -> Reader Env Text
dimensionTableInsertSQL fact dimTableName = do dimensionTableInsertSQL fact dimTableName = do
dimPrefix <- settingDimPrefix <$> asks envSettings dimPrefix <- settingDimPrefix <$> asks envSettings
let colMapping = flip mapMaybe (factColumns fact) $ \fCol -> case fCol of let colMapping = dimColumnMapping dimPrefix fact dimTableName
DimVal dName cName | dimPrefix <> dName == dimTableName ->
Just (dimColumnName dName cName, cName)
_ -> Nothing
return $ "INSERT INTO " <> dimTableName <> " (\n" return $ "INSERT INTO " <> dimTableName <> " (\n"
<> colNamesString (map fst colMapping) <> colNamesString (map fst colMapping)
<> "\n) SELECT DISTINCT \n" <> "\n) SELECT DISTINCT \n"
<> colNamesString (map snd colMapping) <> colNamesString (map snd colMapping)
<> "\nFROM " <> factTableName fact <> "\nFROM " <> factTableName fact
factTableInsertSQL :: Fact -> Reader Env Text
factTableInsertSQL fact= do
let fTableName = factTableName fact
Settings {..} <- asks envSettings
allDims <- extractAllDimensionTables fact
tables <- asks envTables
let table = fromJust . findTable fTableName $ tables
let timeUnitColumnInsertSQL cName =
let colName = timeUnitColumnName cName settingTimeUnit
in (colName, "floor(extract(epoch from " <> fullColName fTableName cName <> ")/"
<> Text.pack (show $ timeUnitToSeconds settingTimeUnit) <> ")")
factColMap = flip concatMap (factColumns fact) $ \col -> case col of
DimTime cName -> [ timeUnitColumnInsertSQL cName ]
NoDimId cName -> [ (cName, fullColName fTableName cName) ]
FactCount cName -> [ (cName, "count(*)") ]
FactSum scName cName -> [ (cName, "sum(" <> fullColName fTableName scName <> ")") ]
FactAverage scName cName -> [ ( averageCountColummName cName
, "count(" <> fullColName fTableName scName <> ")")
, ( averageSumColumnName cName
, "sum(" <> fullColName fTableName scName <> ")") ]
_ -> []
dimColMap = flip map allDims $ \(dimFact, factTable@Table {..}) ->
let colName = factDimFKIdColumnName settingDimPrefix tableName
factSourceTableName = factTableName dimFact
insertSQL =
if factTable `elem` tables
then fullColName factSourceTableName colName
else
let dimLookupWhereClauses =
map (\(c1, c2) ->
fullColName tableName c1 <> " = " <> fullColName factSourceTableName c2)
$ dimColumnMapping settingDimPrefix dimFact tableName
in "SELECT id FROM " <> tableName <> "\nWHERE "
<> (Text.concat . intersperse "\n AND " $ dimLookupWhereClauses)
in (colName, insertSQL)
colMap = map (\(cName, sql) -> (cName, asName cName sql)) $ factColMap ++ dimColMap
joinClauses =
mapMaybe (\tName -> (\p -> "LEFT JOIN " <> tName <> " ON "<> p) <$> joinClausePreds table tName)
. nub
. map (\(dimFact, _) -> factTableName dimFact)
$ allDims
return $ "INSERT INTO " <> extractedFactTableName settingFactPrefix (factName fact) settingTimeUnit
<> " (\n" <> Text.concat (intersperse ",\n " . map fst $ colMap) <> "\n)"
<> "\nSELECT \n" <> Text.concat (intersperse ",\n " . map snd $ colMap)
<> "\nFROM " <> fTableName <> "\n" <> Text.concat (intersperse "\n" joinClauses)
<> "\nGROUP BY \n" <> Text.concat (intersperse ",\n " . map fst $ colMap)
where
fullColName tName cName = tName <> "." <> cName
asName cName sql = "(" <> sql <> ")" <> " as " <> cName
joinClausePreds table oTableName =
fmap (\(ForeignKey _ colPairs) ->
Text.concat . intersperse " AND "
. map (\(c1, c2) -> fullColName (tableName table) c1 <> " = " <> fullColName oTableName c2)
$ colPairs )
. find (\cons -> case cons of
ForeignKey tName _ -> tName == oTableName
_ -> False)
. tableConstraints
$ table

View File

@ -26,12 +26,19 @@ data Table = Table
, tableConstraints :: ![TableConstraint] , tableConstraints :: ![TableConstraint]
} deriving (Eq, Show) } deriving (Eq, Show)
data TimeUnit = Second | Minute | Hour | Day | Week | Month | Year data TimeUnit = Second | Minute | Hour | Day | Week
deriving (Eq, Enum, Show) deriving (Eq, Enum, Show)
timeUnitName :: TimeUnit -> Text timeUnitName :: TimeUnit -> Text
timeUnitName = T.toLower . T.pack . show timeUnitName = T.toLower . T.pack . show
timeUnitToSeconds :: TimeUnit -> Int
timeUnitToSeconds Second = 1
timeUnitToSeconds Minute = 60 * timeUnitToSeconds Second
timeUnitToSeconds Hour = 60 * timeUnitToSeconds Minute
timeUnitToSeconds Day = 24 * timeUnitToSeconds Hour
timeUnitToSeconds Week = 7 * timeUnitToSeconds Day
data Fact = Fact data Fact = Fact
{ factName :: !TableName { factName :: !TableName
, factTableName :: !TableName , factTableName :: !TableName