Extracts constants in code to settings.

pull/1/head
Abhinav Sarkar 2015-12-18 01:00:32 +05:30
parent 21497269ee
commit ea9e100f8f
6 changed files with 138 additions and 87 deletions

View File

@ -39,28 +39,28 @@ writeSQLFiles outputDir env@Env{..} = forM_ sqls $ \(sqlType, table, sql) -> do
dimTables = map (\fact -> (fact, extractDimensionTables env fact)) envFacts
factTables = map (\fact -> (fact, extractFactTable env fact)) envFacts
dimTableDefnSQLs = [ (Create, tableName table, unlines . map sqlStr . tableDefnSQL $ table)
| (_, tabs) <- dimTables
, table <- tabs
, table `notElem` envTables ]
factTableDefnSQLs = [ (Create
, tableName table, unlines . map sqlStr $ factTableDefnSQL env fact table)
| (fact, table) <- factTables ]
dimTableDefnSQLs = [ (Create, tableName table, unlines . map sqlStr . tableDefnSQL $ table)
| (_, tabs) <- dimTables
, table <- tabs
, table `notElem` envTables ]
factTableDefnSQLs = [ (Create
, tableName table, unlines . map sqlStr $ factTableDefnSQL env fact table)
| (fact, table) <- factTables ]
dimTableInsertSQLs = [ (Populate
, tableName table
, sqlStr $ dimensionTableInsertSQL env fact (tableName table))
| (fact, tabs) <- dimTables
, table <- tabs
, table `notElem` envTables ]
dimTableInsertSQLs = [ (Populate
, tableName table
, sqlStr $ dimensionTableInsertSQL env fact (tableName table))
| (fact, tabs) <- dimTables
, table <- tabs
, table `notElem` envTables ]
fctTableInsertSQLs = [ (Populate, tableName table, sqlStr $ factTableInsertSQL env fact)
| (fact, table) <- factTables ]
factTableInsertSQLs = [ (Populate, tableName table, sqlStr $ factTableInsertSQL env fact)
| (fact, table) <- factTables ]
sqls = concat [ dimTableDefnSQLs
, factTableDefnSQLs
, dimTableInsertSQLs
, fctTableInsertSQLs
, factTableInsertSQLs
]
sqlStr s = Text.unpack $ s <> ";\n"
sqlStr s = Text.unpack $ s <> ";\n"

View File

@ -34,6 +34,34 @@ settingsParser = let Settings {..} = defSettings
<> completeWith timeunits
<> help ("Time unit granularity for fact tables. Possible values: "
++ intercalate ", " timeunits))
<*> minorOption "avg-count-col-suffix"
settingAvgCountColumSuffix
"Suffix for average count columns"
<*> minorOption "avg-sum-col-suffix"
settingAvgSumColumnSuffix
"Suffix for average sum columns"
<*> minorOption "count-distinct-col-suffix"
settingCountDistinctColumSuffix
"Suffix for count distinct bucket columns"
<*> minorOption "dim-id-col-name"
settingDimTableIdColumnName
"Name of dimention table id columns"
<*> minorOption "dim-id-col-type"
settingDimTableIdColumnType
"Type of dimention table id columns"
<*> minorOption "fact-count-col-type"
settingFactCountColumnType
"Type of fact table count columns"
<*> minorOption "fact-infix"
settingFactInfix
"Infix for fact tables"
where
minorOption longDesc defValue helpTxt =
Text.pack <$> strOption (long longDesc
<> hidden
<> value (Text.unpack defValue)
<> showDefault
<> help helpTxt)
progArgsParser :: Parser ProgArgs
progArgsParser =
@ -47,10 +75,9 @@ progArgsParser =
<> help "Output directory")
parseArgs :: IO ProgArgs
parseArgs = execParser opts
where
opts = info (helper <*> progArgsParser)
(fullDesc
<> progDesc "Transforms OLTP database schemas to OLAP database star schemas"
<> header "ringo - OLTP to OLAP schema transformer"
<> footer "Source: http://github.com/quintype/ringo")
parseArgs = execParser $
info (helper <*> progArgsParser)
(fullDesc
<> progDesc "Transforms OLTP database schemas to OLAP database star schemas"
<> header "ringo - OLTP to OLAP schema transformer"
<> footer "Source: http://github.com/quintype/ringo")

View File

@ -18,33 +18,39 @@ extractFactTable fact = do
allDims <- extractAllDimensionTables fact
table <- asks $ fromJust . findTable (factTableName fact) . envTables
let intType = "integer"
let countColType = settingFactCountColumnType
dimIdColName = settingDimTableIdColumnName
sourceColumnType colName = columnType . fromJust . findColumn colName . tableColumns $ table
columns = flip concatMap (factColumns fact) $ \col -> case col of
DimTime cName -> [ Column (timeUnitColumnName cName settingTimeUnit) intType NotNull ]
DimTime cName -> [ Column (timeUnitColumnName dimIdColName cName settingTimeUnit) "integer" NotNull ]
NoDimId cName -> [ fromJust . findColumn cName . tableColumns $ table]
FactCount cName -> [ Column cName intType NotNull ]
FactCount cName -> [ Column cName countColType NotNull ]
FactSum scName cName -> [ Column cName (sourceColumnType scName) NotNull ]
FactAverage scName cName -> [ Column (averageCountColummName cName) intType NotNull
, Column (averageSumColumnName cName) (sourceColumnType scName) NotNull
]
FactCountDistinct cName -> [ Column (countDistinctColumnName cName) (intType <> "[]") NotNull ]
FactAverage scName cName ->
[ Column (cName <> settingAvgCountColumSuffix) countColType NotNull
, Column (cName <> settingAvgSumColumnSuffix) (sourceColumnType scName) NotNull
]
FactCountDistinct cName ->
[ Column (cName <> settingCountDistinctColumSuffix) (countColType <> "[]") NotNull ]
_ -> []
fks = flip map allDims $ \(_, Table {..}) ->
let colName = factDimFKIdColumnName settingDimPrefix tableName
let colName = factDimFKIdColumnName settingDimPrefix dimIdColName tableName
colNullable = if any ((== Null) . columnNullable) tableColumns then Null else NotNull
in (Column colName intType colNullable, ForeignKey tableName [(colName, "id")])
in ( Column colName (idColTypeToFKIdColType settingDimTableIdColumnType) colNullable
, ForeignKey tableName [(colName, dimIdColName)]
)
ukColNames =
(++ map (columnName . fst) fks)
. flip mapMaybe (factColumns fact) $ \col -> case col of
DimTime cName -> Just (timeUnitColumnName cName settingTimeUnit)
DimTime cName -> Just (timeUnitColumnName dimIdColName cName settingTimeUnit)
NoDimId cName -> Just cName
_ -> Nothing
return Table { tableName = extractedFactTableName settingFactPrefix (factName fact) settingTimeUnit
return Table { tableName =
extractedFactTableName settingFactPrefix settingFactInfix (factName fact) settingTimeUnit
, tableColumns = columns ++ map fst fks
, tableConstraints = UniqueKey ukColNames : map snd fks
}

View File

@ -8,53 +8,55 @@ import Data.Function (on)
import Data.Maybe (mapMaybe, fromMaybe, fromJust)
import Data.Monoid ((<>))
import Data.List (nub, nubBy)
import Data.Text (Text)
import Ringo.Types
import Ringo.Utils
dimColumnName :: Text.Text -> ColumnName -> ColumnName
dimColumnName :: Text -> ColumnName -> ColumnName
dimColumnName dimName columnName =
fromMaybe columnName . Text.stripPrefix (dimName <> "_") $ columnName
timeUnitColumnName :: ColumnName -> TimeUnit -> ColumnName
timeUnitColumnName colName timeUnit = colName <> "_" <> timeUnitName timeUnit <> "_id"
timeUnitColumnName :: Text -> ColumnName -> TimeUnit -> ColumnName
timeUnitColumnName dimIdColName colName timeUnit =
colName <> "_" <> timeUnitName timeUnit <> "_" <> dimIdColName
averageCountColummName :: ColumnName -> ColumnName
averageCountColummName colName = colName <> "_count"
factDimFKIdColumnName :: Text -> Text -> TableName -> ColumnName
factDimFKIdColumnName dimPrefix dimIdColName dimTableName =
fromMaybe dimTableName (Text.stripPrefix dimPrefix dimTableName) <> "_" <> dimIdColName
averageSumColumnName :: ColumnName -> ColumnName
averageSumColumnName colName = colName <> "_sum"
extractedFactTableName :: Text -> Text -> TableName -> TimeUnit -> TableName
extractedFactTableName factPrefix factInfix factName timeUnit =
factPrefix <> factName <> factInfix <> timeUnitName timeUnit
countDistinctColumnName :: ColumnName -> ColumnName
countDistinctColumnName colName = colName <> "_hll"
factDimFKIdColumnName :: Text.Text -> TableName -> ColumnName
factDimFKIdColumnName dimPrefix dimTableName =
fromMaybe dimTableName (Text.stripPrefix dimPrefix dimTableName) <> "_id"
extractedFactTableName :: Text.Text -> TableName -> TimeUnit -> TableName
extractedFactTableName factPrefix factName timeUnit =
factPrefix <> factName <> "_by_" <> timeUnitName timeUnit
idColTypeToFKIdColType :: Text -> Text
idColTypeToFKIdColType typ = case Text.toLower typ of
"serial" -> "integer"
"smallserial" -> "smallint"
"bigserial" -> "bigint"
_ -> typ
extractDimensionTables :: Fact -> Reader Env [Table]
extractDimensionTables fact = do
settings <- asks envSettings
tables <- asks envTables
prefix <- settingDimPrefix <$> asks envSettings
let table = fromJust . findTable (factTableName fact) $ tables
return $ dimsFromIds tables ++ dimsFromVals prefix (tableColumns table)
return $ dimsFromIds tables ++ dimsFromVals settings (tableColumns table)
where
dimsFromIds tables =
flip mapMaybe (factColumns fact) $ \fcol -> case fcol of
DimId d _ -> findTable d tables
_ -> Nothing
dimsFromVals prefix tableColumns =
map (\(dim, cols) -> Table { tableName = prefix <> dim
, tableColumns = Column "id" "serial" NotNull : cols
, tableConstraints = [ PrimaryKey "id"
, UniqueKey (map columnName cols)
]
})
dimsFromVals Settings {..} tableColumns =
map (\(dim, cols) ->
Table { tableName = settingDimPrefix <> dim
, tableColumns =
Column settingDimTableIdColumnName settingDimTableIdColumnType NotNull : cols
, tableConstraints = [ PrimaryKey settingDimTableIdColumnName
, UniqueKey (map columnName cols)
]
})
. Map.toList
. Map.mapWithKey (\dim ->
map (\col@Column {..} -> col { columnName = dimColumnName dim columnName })

View File

@ -52,12 +52,12 @@ factTableDefnSQL fact table = do
allDims <- extractAllDimensionTables fact
let factCols = flip mapMaybe (factColumns fact) $ \col -> case col of
DimTime cName -> Just $ timeUnitColumnName cName settingTimeUnit
DimTime cName -> Just $ timeUnitColumnName settingDimTableIdColumnName cName settingTimeUnit
NoDimId cName -> Just cName
_ -> Nothing
dimCols = flip map allDims $ \(_, Table {..}) ->
factDimFKIdColumnName settingDimPrefix tableName
factDimFKIdColumnName settingDimPrefix settingDimTableIdColumnName tableName
indexSQLs = flip map (factCols ++ dimCols) $ \col ->
"CREATE INDEX ON " <> tableName table <> " USING btree (" <> col <> ")"
@ -83,14 +83,15 @@ dimensionTableInsertSQL fact dimTableName = do
factTableInsertSQL :: Fact -> Reader Env Text
factTableInsertSQL fact = do
let fTableName = factTableName fact
Settings {..} <- asks envSettings
allDims <- extractAllDimensionTables fact
tables <- asks envTables
let table = fromJust . findTable fTableName $ tables
let fTableName = factTableName fact
Settings {..} <- asks envSettings
allDims <- extractAllDimensionTables fact
tables <- asks envTables
let table = fromJust . findTable fTableName $ tables
dimIdColName = settingDimTableIdColumnName
let timeUnitColumnInsertSQL cName =
let colName = timeUnitColumnName cName settingTimeUnit
let colName = timeUnitColumnName dimIdColName cName settingTimeUnit
in (colName, "floor(extract(epoch from " <> fullColName fTableName cName <> ")/"
<> Text.pack (show $ timeUnitToSeconds settingTimeUnit) <> ")")
@ -99,14 +100,14 @@ factTableInsertSQL fact = do
NoDimId cName -> [ (cName, fullColName fTableName cName) ]
FactCount cName -> [ (cName, "count(*)") ]
FactSum scName cName -> [ (cName, "sum(" <> fullColName fTableName scName <> ")") ]
FactAverage scName cName -> [ ( averageCountColummName cName
FactAverage scName cName -> [ ( cName <> settingAvgCountColumSuffix
, "count(" <> fullColName fTableName scName <> ")")
, ( averageSumColumnName cName
, ( cName <> settingAvgSumColumnSuffix
, "sum(" <> fullColName fTableName scName <> ")") ]
_ -> []
dimColMap = flip map allDims $ \(dimFact, factTable@Table {..}) ->
let colName = factDimFKIdColumnName settingDimPrefix tableName
let colName = factDimFKIdColumnName settingDimPrefix dimIdColName tableName
factSourceTableName = factTableName dimFact
insertSQL =
if factTable `elem` tables
@ -116,7 +117,7 @@ factTableInsertSQL fact = do
map (\(c1, c2) ->
fullColName tableName c1 <> " = " <> fullColName factSourceTableName c2)
$ dimColumnMapping settingDimPrefix dimFact tableName
in "SELECT id FROM " <> tableName <> "\nWHERE "
in "SELECT " <> dimIdColName <> " FROM " <> tableName <> "\nWHERE "
<> (Text.concat . intersperse "\n AND " $ dimLookupWhereClauses)
in (colName, insertSQL)
@ -128,7 +129,8 @@ factTableInsertSQL fact = do
. map (\(dimFact, _) -> factTableName dimFact)
$ allDims
return $ "INSERT INTO " <> extractedFactTableName settingFactPrefix (factName fact) settingTimeUnit
return $ "INSERT INTO "
<> extractedFactTableName settingFactPrefix settingFactInfix (factName fact) settingTimeUnit
<> " (\n" <> Text.concat (intersperse ",\n " . map fst $ colMap) <> "\n)"
<> "\nSELECT \n" <> Text.concat (intersperse ",\n " . map snd $ colMap)
<> "\nFROM " <> fTableName <> "\n" <> Text.concat (intersperse "\n" joinClauses)

View File

@ -5,7 +5,7 @@ import qualified Data.Text as Text
type ColumnName = Text
type ColumnType = Text
type TableName = Text
type TableName = Text
data Nullable = Null | NotNull deriving (Eq, Enum, Show)
@ -67,25 +67,39 @@ factColumnName (FactAverage cName _) = Just cName
factColumnName (FactCountDistinct _) = Nothing
data Settings = Settings
{ settingDimPrefix :: !Text
, settingFactPrefix :: !Text
, settingTimeUnit :: !TimeUnit
{ settingDimPrefix :: !Text
, settingFactPrefix :: !Text
, settingTimeUnit :: !TimeUnit
, settingAvgCountColumSuffix :: !Text
, settingAvgSumColumnSuffix :: !Text
, settingCountDistinctColumSuffix :: !Text
, settingDimTableIdColumnName :: !Text
, settingDimTableIdColumnType :: !Text
, settingFactCountColumnType :: !Text
, settingFactInfix :: !Text
} deriving (Eq, Show)
defSettings :: Settings
defSettings = Settings
{ settingDimPrefix = "dim_"
, settingFactPrefix = "fact_"
, settingTimeUnit = Minute
{ settingDimPrefix = "dim_"
, settingFactPrefix = "fact_"
, settingTimeUnit = Minute
, settingAvgCountColumSuffix = "_count"
, settingAvgSumColumnSuffix = "_sum"
, settingCountDistinctColumSuffix = "_hll"
, settingDimTableIdColumnName = "id"
, settingDimTableIdColumnType = "serial"
, settingFactCountColumnType = "integer"
, settingFactInfix = "_by_"
}
data ValidationError = MissingTable TableName
| MissingFact TableName
| MissingColumn TableName ColumnName
data ValidationError = MissingTable !TableName
| MissingFact !TableName
| MissingColumn !TableName !ColumnName
deriving (Eq, Show)
data Env = Env
{ envTables :: [Table]
, envFacts :: [Fact]
, envSettings :: Settings
{ envTables :: ![Table]
, envFacts :: ![Fact]
, envSettings :: !Settings
} deriving (Eq, Show)