Adds fact table insert SQL generation.

pull/1/head
Abhinav Sarkar 2015-12-16 16:57:10 +05:30
parent c39916704e
commit 900b4b7488
4 changed files with 123 additions and 25 deletions

View File

@ -4,10 +4,8 @@ module Ringo.Extractor
, extractFactTable
) where
import qualified Data.Text as Text
import Control.Monad.Reader (Reader, asks)
import Data.Maybe (mapMaybe, fromMaybe, fromJust)
import Data.Maybe (mapMaybe, fromJust)
import Data.Monoid ((<>))
import Ringo.Extractor.Internal
@ -24,31 +22,29 @@ extractFactTable fact = do
sourceColumnType colName = columnType . fromJust . findColumn colName . tableColumns $ table
columns = flip concatMap (factColumns fact) $ \col -> case col of
DimTime cName -> [ Column (timeUnitColName cName settingTimeUnit) intType NotNull ]
DimTime cName -> [ Column (timeUnitColumnName cName settingTimeUnit) intType NotNull ]
NoDimId cName -> [ fromJust . findColumn cName . tableColumns $ table]
FactCount cName -> [ Column cName intType NotNull ]
FactSum scName cName -> [ Column cName (sourceColumnType scName) NotNull ]
FactAverage scName cName -> [ Column (cName <> "_count") intType NotNull
, Column (cName <> "_sum") (sourceColumnType scName) NotNull
FactAverage scName cName -> [ Column (averageCountColummName cName) intType NotNull
, Column (averageSumColumnName cName) (sourceColumnType scName) NotNull
]
FactCountDistinct cName -> [ Column (cName <> "_hll") (intType <> "[]") NotNull ]
FactCountDistinct cName -> [ Column (countDistinctColumnName cName) (intType <> "[]") NotNull ]
_ -> []
fks = flip map allDims $ \Table { .. } ->
let colName = fromMaybe tableName (Text.stripPrefix settingDimPrefix tableName) <> "_id"
fks = flip map allDims $ \(_, Table {..}) ->
let colName = factDimFKIdColumnName settingDimPrefix tableName
colNullable = if any ((== Null) . columnNullable) tableColumns then Null else NotNull
in (Column colName intType colNullable, ForeignKey tableName [(colName, "id")])
ukColNames =
(++ map (columnName . fst) fks)
. flip mapMaybe (factColumns fact) $ \col -> case col of
DimTime cName -> Just (timeUnitColName cName settingTimeUnit)
DimTime cName -> Just (timeUnitColumnName cName settingTimeUnit)
NoDimId cName -> Just cName
_ -> Nothing
return Table { tableName = settingFactPrefix <> factName fact
return Table { tableName = extractedFactTableName settingFactPrefix (factName fact) settingTimeUnit
, tableColumns = columns ++ map fst fks
, tableConstraints = UniqueKey ukColNames : map snd fks
}
where
timeUnitColName colName timeUnit = colName <> "_" <> timeUnitName timeUnit <> "_id"

View File

@ -4,9 +4,10 @@ import qualified Data.Map as Map
import qualified Data.Text as Text
import Control.Monad.Reader (Reader, asks)
import Data.Function (on)
import Data.Maybe (mapMaybe, fromMaybe, fromJust)
import Data.Monoid ((<>))
import Data.List (nub)
import Data.List (nub, nubBy)
import Ringo.Types
import Ringo.Utils
@ -15,6 +16,26 @@ dimColumnName :: Text.Text -> ColumnName -> ColumnName
dimColumnName dimName columnName =
fromMaybe columnName . Text.stripPrefix (dimName <> "_") $ columnName
timeUnitColumnName :: ColumnName -> TimeUnit -> ColumnName
timeUnitColumnName colName timeUnit = colName <> "_" <> timeUnitName timeUnit <> "_id"
averageCountColummName :: ColumnName -> ColumnName
averageCountColummName colName = colName <> "_count"
averageSumColumnName :: ColumnName -> ColumnName
averageSumColumnName colName = colName <> "_sum"
countDistinctColumnName :: ColumnName -> ColumnName
countDistinctColumnName colName = colName <> "_hll"
factDimFKIdColumnName :: Text.Text -> TableName -> ColumnName
factDimFKIdColumnName dimPrefix dimTableName =
fromMaybe dimTableName (Text.stripPrefix dimPrefix dimTableName) <> "_id"
extractedFactTableName :: Text.Text -> TableName -> TimeUnit -> TableName
extractedFactTableName factPrefix factName timeUnit =
factPrefix <> factName <> "_by_" <> timeUnitName timeUnit
extractDimensionTables :: Fact -> Reader Env [Table]
extractDimensionTables fact = do
tables <- asks envTables
@ -47,11 +68,11 @@ extractDimensionTables fact = do
. factColumns
$ fact
extractAllDimensionTables :: Fact -> Reader Env [Table]
extractAllDimensionTables :: Fact -> Reader Env [(Fact, Table)]
extractAllDimensionTables fact = do
myDims <- extractDimensionTables fact
myDims <- map (fact,) <$> extractDimensionTables fact
parentDims <- concat <$> mapM extract (factParentNames fact)
return . nub $ myDims ++ parentDims
return . nubBy ((==) `on` snd) $ myDims ++ parentDims
where
extract fName = do
facts <- asks envFacts

View File

@ -1,18 +1,20 @@
module Ringo.Generator
( tableDefnSQL
, dimensionTableInsertSQL
, factTableInsertSQL
) where
import qualified Data.Text as Text
import Control.Monad.Reader (Reader, asks)
import Data.List (intersperse)
import Data.Maybe (mapMaybe)
import Data.List (intersperse, nub, find)
import Data.Maybe (fromJust, mapMaybe)
import Data.Monoid ((<>))
import Data.Text (Text)
import Ringo.Extractor.Internal
import Ringo.Types
import Ringo.Utils
nullableDefnSQL :: Nullable -> Text
nullableDefnSQL Null = "NULL"
@ -43,16 +45,88 @@ tableDefnSQL Table {..} =
<> (Text.concat . intersperse ",\n" . map columnDefnSQL $ tableColumns)
<> "\n)"
dimensionTableInsertSQL :: Fact -> TableName -> Reader Env Text
dimensionTableInsertSQL fact dimTableName = do
dimPrefix <- settingDimPrefix <$> asks envSettings
let colMapping = flip mapMaybe (factColumns fact) $ \fCol -> case fCol of
dimColumnMapping :: Text -> Fact -> TableName -> [(ColumnName, ColumnName)]
dimColumnMapping dimPrefix fact dimTableName =
flip mapMaybe (factColumns fact) $ \fCol -> case fCol of
DimVal dName cName | dimPrefix <> dName == dimTableName ->
Just (dimColumnName dName cName, cName)
_ -> Nothing
dimensionTableInsertSQL :: Fact -> TableName -> Reader Env Text
dimensionTableInsertSQL fact dimTableName = do
dimPrefix <- settingDimPrefix <$> asks envSettings
let colMapping = dimColumnMapping dimPrefix fact dimTableName
return $ "INSERT INTO " <> dimTableName <> " (\n"
<> colNamesString (map fst colMapping)
<> "\n) SELECT DISTINCT \n"
<> colNamesString (map snd colMapping)
<> "\nFROM " <> factTableName fact
factTableInsertSQL :: Fact -> Reader Env Text
factTableInsertSQL fact= do
let fTableName = factTableName fact
Settings {..} <- asks envSettings
allDims <- extractAllDimensionTables fact
tables <- asks envTables
let table = fromJust . findTable fTableName $ tables
let timeUnitColumnInsertSQL cName =
let colName = timeUnitColumnName cName settingTimeUnit
in (colName, "floor(extract(epoch from " <> fullColName fTableName cName <> ")/"
<> Text.pack (show $ timeUnitToSeconds settingTimeUnit) <> ")")
factColMap = flip concatMap (factColumns fact) $ \col -> case col of
DimTime cName -> [ timeUnitColumnInsertSQL cName ]
NoDimId cName -> [ (cName, fullColName fTableName cName) ]
FactCount cName -> [ (cName, "count(*)") ]
FactSum scName cName -> [ (cName, "sum(" <> fullColName fTableName scName <> ")") ]
FactAverage scName cName -> [ ( averageCountColummName cName
, "count(" <> fullColName fTableName scName <> ")")
, ( averageSumColumnName cName
, "sum(" <> fullColName fTableName scName <> ")") ]
_ -> []
dimColMap = flip map allDims $ \(dimFact, factTable@Table {..}) ->
let colName = factDimFKIdColumnName settingDimPrefix tableName
factSourceTableName = factTableName dimFact
insertSQL =
if factTable `elem` tables
then fullColName factSourceTableName colName
else
let dimLookupWhereClauses =
map (\(c1, c2) ->
fullColName tableName c1 <> " = " <> fullColName factSourceTableName c2)
$ dimColumnMapping settingDimPrefix dimFact tableName
in "SELECT id FROM " <> tableName <> "\nWHERE "
<> (Text.concat . intersperse "\n AND " $ dimLookupWhereClauses)
in (colName, insertSQL)
colMap = map (\(cName, sql) -> (cName, asName cName sql)) $ factColMap ++ dimColMap
joinClauses =
mapMaybe (\tName -> (\p -> "LEFT JOIN " <> tName <> " ON "<> p) <$> joinClausePreds table tName)
. nub
. map (\(dimFact, _) -> factTableName dimFact)
$ allDims
return $ "INSERT INTO " <> extractedFactTableName settingFactPrefix (factName fact) settingTimeUnit
<> " (\n" <> Text.concat (intersperse ",\n " . map fst $ colMap) <> "\n)"
<> "\nSELECT \n" <> Text.concat (intersperse ",\n " . map snd $ colMap)
<> "\nFROM " <> fTableName <> "\n" <> Text.concat (intersperse "\n" joinClauses)
<> "\nGROUP BY \n" <> Text.concat (intersperse ",\n " . map fst $ colMap)
where
fullColName tName cName = tName <> "." <> cName
asName cName sql = "(" <> sql <> ")" <> " as " <> cName
joinClausePreds table oTableName =
fmap (\(ForeignKey _ colPairs) ->
Text.concat . intersperse " AND "
. map (\(c1, c2) -> fullColName (tableName table) c1 <> " = " <> fullColName oTableName c2)
$ colPairs )
. find (\cons -> case cons of
ForeignKey tName _ -> tName == oTableName
_ -> False)
. tableConstraints
$ table

View File

@ -26,12 +26,19 @@ data Table = Table
, tableConstraints :: ![TableConstraint]
} deriving (Eq, Show)
data TimeUnit = Second | Minute | Hour | Day | Week | Month | Year
data TimeUnit = Second | Minute | Hour | Day | Week
deriving (Eq, Enum, Show)
timeUnitName :: TimeUnit -> Text
timeUnitName = T.toLower . T.pack . show
timeUnitToSeconds :: TimeUnit -> Int
timeUnitToSeconds Second = 1
timeUnitToSeconds Minute = 60 * timeUnitToSeconds Second
timeUnitToSeconds Hour = 60 * timeUnitToSeconds Minute
timeUnitToSeconds Day = 24 * timeUnitToSeconds Hour
timeUnitToSeconds Week = 7 * timeUnitToSeconds Day
data Fact = Fact
{ factName :: !TableName
, factTableName :: !TableName